Refactor everything
Huge dump of refactored code. Still in the middle of the changes that are to be squashed later in a one huge monster commit, because there is no value in anything atomic here.
This commit is contained in:
parent
5bd9aadc55
commit
588790b9b4
287
Cargo.lock
generated
287
Cargo.lock
generated
@ -78,6 +78,12 @@ version = "2.9.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bumpalo"
|
||||||
|
version = "3.19.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytes"
|
name = "bytes"
|
||||||
version = "1.10.1"
|
version = "1.10.1"
|
||||||
@ -96,6 +102,73 @@ version = "0.2.4"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31"
|
checksum = "1a48563284b67c003ba0fb7243c87fab68885e1532c605704228a80238512e31"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "convert_case"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "633458d4ef8c78b72454de2d54fd6ab2e60f9e02be22f3c6104cdc8a4e0fceb9"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-segmentation",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "core-foundation-sys"
|
||||||
|
version = "0.8.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-deque"
|
||||||
|
version = "0.8.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-epoch",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-epoch"
|
||||||
|
version = "0.9.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossbeam-utils"
|
||||||
|
version = "0.8.21"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm"
|
||||||
|
version = "0.29.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.9.1",
|
||||||
|
"crossterm_winapi",
|
||||||
|
"derive_more",
|
||||||
|
"document-features",
|
||||||
|
"mio",
|
||||||
|
"parking_lot",
|
||||||
|
"rustix",
|
||||||
|
"signal-hook",
|
||||||
|
"signal-hook-mio",
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "crossterm_winapi"
|
||||||
|
version = "0.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dashmap"
|
name = "dashmap"
|
||||||
version = "5.5.3"
|
version = "5.5.3"
|
||||||
@ -109,13 +182,38 @@ dependencies = [
|
|||||||
"parking_lot_core",
|
"parking_lot_core",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_more"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d751e9e49156b02b44f9c1815bcb94b984cdcc4396ecc32521c739452808b134"
|
||||||
|
dependencies = [
|
||||||
|
"derive_more-impl",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_more-impl"
|
||||||
|
version = "2.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "799a97264921d8623a957f6c3b9011f3b5492f557bbb7a5a19b7fa6d06ba8dcb"
|
||||||
|
dependencies = [
|
||||||
|
"convert_case",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"rustc_version",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dev_tests"
|
name = "dev_tests"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"chardet",
|
"chardet",
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
|
"is-terminal",
|
||||||
|
"libc",
|
||||||
"rottlib",
|
"rottlib",
|
||||||
|
"sysinfo",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -130,6 +228,21 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "document-features"
|
||||||
|
version = "0.2.12"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
|
||||||
|
dependencies = [
|
||||||
|
"litrs",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.15.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.35"
|
version = "0.8.35"
|
||||||
@ -139,6 +252,16 @@ dependencies = [
|
|||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "errno"
|
||||||
|
version = "0.3.14"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fnv"
|
name = "fnv"
|
||||||
version = "1.0.7"
|
version = "1.0.7"
|
||||||
@ -243,6 +366,12 @@ version = "0.14.5"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "hermit-abi"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httparse"
|
name = "httparse"
|
||||||
version = "1.10.1"
|
version = "1.10.1"
|
||||||
@ -367,6 +496,17 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is-terminal"
|
||||||
|
version = "0.4.16"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
|
||||||
|
dependencies = [
|
||||||
|
"hermit-abi",
|
||||||
|
"libc",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.15"
|
version = "1.0.15"
|
||||||
@ -385,12 +525,24 @@ version = "0.2.174"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "linux-raw-sys"
|
||||||
|
version = "0.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "litemap"
|
name = "litemap"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "litrs"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lock_api"
|
name = "lock_api"
|
||||||
version = "0.4.13"
|
version = "0.4.13"
|
||||||
@ -401,6 +553,12 @@ dependencies = [
|
|||||||
"scopeguard",
|
"scopeguard",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "log"
|
||||||
|
version = "0.4.29"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "logos"
|
name = "logos"
|
||||||
version = "0.15.0"
|
version = "0.15.0"
|
||||||
@ -470,10 +628,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
|
checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
|
"log",
|
||||||
"wasi",
|
"wasi",
|
||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ntapi"
|
||||||
|
version = "0.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "object"
|
name = "object"
|
||||||
version = "0.36.7"
|
version = "0.36.7"
|
||||||
@ -577,6 +745,26 @@ dependencies = [
|
|||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon"
|
||||||
|
version = "1.11.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
"rayon-core",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rayon-core"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
||||||
|
dependencies = [
|
||||||
|
"crossbeam-deque",
|
||||||
|
"crossbeam-utils",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "redox_syscall"
|
name = "redox_syscall"
|
||||||
version = "0.5.17"
|
version = "0.5.17"
|
||||||
@ -596,6 +784,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
|||||||
name = "rottlib"
|
name = "rottlib"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"backtrace",
|
||||||
|
"bumpalo",
|
||||||
|
"crossterm",
|
||||||
"logos",
|
"logos",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -623,6 +814,19 @@ dependencies = [
|
|||||||
"semver",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustix"
|
||||||
|
version = "1.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.9.1",
|
||||||
|
"errno",
|
||||||
|
"libc",
|
||||||
|
"linux-raw-sys",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ryu"
|
name = "ryu"
|
||||||
version = "1.0.20"
|
version = "1.0.20"
|
||||||
@ -693,6 +897,27 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook"
|
||||||
|
version = "0.3.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d881a16cf4426aa584979d30bd82cb33429027e42122b169753d6ef1085ed6e2"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"signal-hook-registry",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "signal-hook-mio"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b75a19a7a740b25bc7944bdee6172368f988763b744e3d4dfe753f6b4ece40cc"
|
||||||
|
dependencies = [
|
||||||
|
"libc",
|
||||||
|
"mio",
|
||||||
|
"signal-hook",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "signal-hook-registry"
|
name = "signal-hook-registry"
|
||||||
version = "1.4.5"
|
version = "1.4.5"
|
||||||
@ -752,6 +977,21 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sysinfo"
|
||||||
|
version = "0.30.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3"
|
||||||
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
|
"core-foundation-sys",
|
||||||
|
"libc",
|
||||||
|
"ntapi",
|
||||||
|
"once_cell",
|
||||||
|
"rayon",
|
||||||
|
"windows",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tinystr"
|
name = "tinystr"
|
||||||
version = "0.8.1"
|
version = "0.8.1"
|
||||||
@ -903,6 +1143,12 @@ version = "1.0.18"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-segmentation"
|
||||||
|
version = "1.13.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "url"
|
name = "url"
|
||||||
version = "2.5.4"
|
version = "2.5.4"
|
||||||
@ -937,6 +1183,22 @@ version = "0.11.1+wasi-snapshot-preview1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi"
|
||||||
|
version = "0.3.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
|
||||||
|
dependencies = [
|
||||||
|
"winapi-i686-pc-windows-gnu",
|
||||||
|
"winapi-x86_64-pc-windows-gnu",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-i686-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "winapi-util"
|
name = "winapi-util"
|
||||||
version = "0.1.9"
|
version = "0.1.9"
|
||||||
@ -946,6 +1208,31 @@ dependencies = [
|
|||||||
"windows-sys",
|
"windows-sys",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "winapi-x86_64-pc-windows-gnu"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows"
|
||||||
|
version = "0.52.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
|
||||||
|
dependencies = [
|
||||||
|
"windows-core",
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-core"
|
||||||
|
version = "0.52.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "windows-sys"
|
name = "windows-sys"
|
||||||
version = "0.59.0"
|
version = "0.59.0"
|
||||||
|
|||||||
@ -3,22 +3,21 @@ name = "dev_tests"
|
|||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[[bin]]
|
|
||||||
name = "dump_tokens"
|
|
||||||
path = "src/dump_tokens.rs"
|
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "uc_lexer_verify"
|
name = "uc_lexer_verify"
|
||||||
path = "src/uc_lexer_verify.rs"
|
path = "src/uc_lexer_verify.rs"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "temp"
|
name = "verify_expr"
|
||||||
path = "src/temp.rs"
|
path = "src/verify_expr.rs"
|
||||||
|
|
||||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
rottlib = { version = "0", path = "../rottlib", features = ["debug"] }
|
rottlib = { version = "0", path = "../rottlib", features = ["debug"] }
|
||||||
|
is-terminal = "0.4"
|
||||||
|
libc = "0.2"
|
||||||
|
sysinfo = "0.30"
|
||||||
walkdir="2.5"
|
walkdir="2.5"
|
||||||
encoding_rs="0.8"
|
encoding_rs="0.8"
|
||||||
chardet="0.2"
|
chardet="0.2"
|
||||||
|
|||||||
@ -1,76 +0,0 @@
|
|||||||
use std::{
|
|
||||||
fs,
|
|
||||||
path::{Path, PathBuf},
|
|
||||||
};
|
|
||||||
|
|
||||||
use encoding_rs::{Encoding, UTF_8};
|
|
||||||
use rottlib::lexer::{DebugTools, TokenizedFile};
|
|
||||||
|
|
||||||
/// Recursively search `root` for the first file whose *basename* matches
|
|
||||||
/// `needle` (case-sensitive).
|
|
||||||
///
|
|
||||||
/// Returns the absolute path.
|
|
||||||
fn find_file(root: &Path, needle: &str) -> Option<PathBuf> {
|
|
||||||
for entry in walkdir::WalkDir::new(root)
|
|
||||||
.into_iter()
|
|
||||||
.filter_map(Result::ok)
|
|
||||||
{
|
|
||||||
let path = entry.path();
|
|
||||||
if path.is_file() && (path.file_name().and_then(|name| name.to_str()) == Some(needle)) {
|
|
||||||
return fs::canonicalize(path).ok();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// CLI: `dump_tokens <root_dir> <file_name>` - searches for `<file_name>`
|
|
||||||
/// recursively inside `<root_dir>`.
|
|
||||||
///
|
|
||||||
/// This utility takes *root directory* and *file name* instead of the full path
|
|
||||||
/// to help us avoid searching for them typing names out:
|
|
||||||
///
|
|
||||||
/// - We know where all the sources are;
|
|
||||||
/// - We usually just know the name of the file that is being problematic.
|
|
||||||
fn main() {
|
|
||||||
let mut args = std::env::args().skip(1);
|
|
||||||
let root_dir = args.next().unwrap_or_else(|| {
|
|
||||||
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
|
|
||||||
std::process::exit(1);
|
|
||||||
});
|
|
||||||
let file_name = args.next().unwrap_or_else(|| {
|
|
||||||
eprintln!("Usage: inspect_uc <root_dir> <file_name>");
|
|
||||||
std::process::exit(1);
|
|
||||||
});
|
|
||||||
|
|
||||||
let root = PathBuf::from(&root_dir);
|
|
||||||
if !root.exists() {
|
|
||||||
eprintln!("Root directory '{root_dir}' does not exist.");
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
let found_path = find_file(&root, &file_name).map_or_else(
|
|
||||||
|| {
|
|
||||||
eprintln!("File '{file_name}' not found under '{root_dir}'.");
|
|
||||||
std::process::exit(1);
|
|
||||||
},
|
|
||||||
|path| path,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Read & decode
|
|
||||||
let raw_bytes = match fs::read(&found_path) {
|
|
||||||
Ok(sources) => sources,
|
|
||||||
Err(error) => {
|
|
||||||
eprintln!("Could not read {}: {error}", found_path.display());
|
|
||||||
std::process::exit(1);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
|
|
||||||
let encoding = Encoding::for_label(encoding_label.as_bytes()).unwrap_or(UTF_8);
|
|
||||||
let (decoded_str, _, _) = encoding.decode(&raw_bytes);
|
|
||||||
|
|
||||||
let source_text = decoded_str.to_string();
|
|
||||||
let tokenized_file = TokenizedFile::from_str(&source_text);
|
|
||||||
|
|
||||||
tokenized_file.dump_debug_layout();
|
|
||||||
}
|
|
||||||
14
dev_tests/src/pretty.rs
Normal file
14
dev_tests/src/pretty.rs
Normal file
@ -0,0 +1,14 @@
|
|||||||
|
// diagnostics_render.rs
|
||||||
|
|
||||||
|
use rottlib::diagnostics::{Diagnostic};
|
||||||
|
use rottlib::lexer::TokenizedFile;
|
||||||
|
|
||||||
|
pub fn render_diagnostic(
|
||||||
|
diag: &Diagnostic,
|
||||||
|
_file: &TokenizedFile,
|
||||||
|
file_name: Option<&str>,
|
||||||
|
colors: bool,
|
||||||
|
) -> String {
|
||||||
|
diag.render(_file, file_name.unwrap_or("<default>"));
|
||||||
|
"fuck it".to_string()
|
||||||
|
}
|
||||||
@ -1,129 +0,0 @@
|
|||||||
//! src/main.rs
|
|
||||||
//! --------------------------------------------
|
|
||||||
//! Build & run:
|
|
||||||
//! cargo run
|
|
||||||
//! --------------------------------------------
|
|
||||||
|
|
||||||
use std::env;
|
|
||||||
use std::fs;
|
|
||||||
use std::io::{self, Read, Write};
|
|
||||||
use std::path::Path;
|
|
||||||
|
|
||||||
use rottlib::arena::Arena;
|
|
||||||
use rottlib::lexer::TokenizedFile;
|
|
||||||
use rottlib::parser::{ParseError, Parser, pretty::ExprTree};
|
|
||||||
|
|
||||||
/*
|
|
||||||
- Convenient array definitions: [1, 3, 5, 2, 4]
|
|
||||||
- Boolean dynamic arrays
|
|
||||||
- Structures in default properties
|
|
||||||
- Auto conversion of arrays into strings
|
|
||||||
- Making 'var' and 'local' unnecessary
|
|
||||||
- Allowing variable creation in 'for' loops
|
|
||||||
- Allowing variable creation at any place inside a function
|
|
||||||
- Default parameters for functions
|
|
||||||
- Function overloading?
|
|
||||||
- repeat/until
|
|
||||||
- The syntax of the default properties block is pretty strict for an arcane reason. Particularly adding spaces before or after the "=" will lead to errors in pre-UT2003 versions.
|
|
||||||
- Scopes
|
|
||||||
- different names for variables and in config file
|
|
||||||
- anonymous pairs (objects?) and value destruction
|
|
||||||
>>> AST > HIR > MIR > byte code
|
|
||||||
*/
|
|
||||||
|
|
||||||
/// Closest plan:
|
|
||||||
/// - Add top-level declaration parsing
|
|
||||||
/// - Handle pretty.rs shit somehow
|
|
||||||
/// - COMMITS
|
|
||||||
/// ---------------------------------------
|
|
||||||
/// - Add fancy error reporting
|
|
||||||
/// - Make a fancy REPL
|
|
||||||
/// - Add evaluation
|
|
||||||
///
|
|
||||||
/// WARNINGS:
|
|
||||||
/// - Empty code/switch blocks
|
|
||||||
|
|
||||||
fn parse_and_print(src: &str) -> Result<(), ParseError> {
|
|
||||||
let tokenized = TokenizedFile::from_str(src);
|
|
||||||
let arena = Arena::new();
|
|
||||||
let mut parser = Parser::new(&tokenized, &arena);
|
|
||||||
|
|
||||||
let expr = parser.parse_expression(); // ArenaNode<Expression>
|
|
||||||
println!("{}", ExprTree(&*expr)); // if ArenaNode<Deref>
|
|
||||||
// or: println!("{}", ExprTree(expr.as_ref())); // if no Deref
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn repl_once() -> Result<(), ParseError> {
|
|
||||||
print!("Enter an statement > ");
|
|
||||||
io::stdout().flush().unwrap();
|
|
||||||
|
|
||||||
let mut input = String::new();
|
|
||||||
if io::stdin().read_line(&mut input).is_err() {
|
|
||||||
eprintln!("failed to read input");
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
if input.trim().is_empty() {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
parse_and_print(&input)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_stdin_all() -> io::Result<String> {
|
|
||||||
let mut buf = String::new();
|
|
||||||
io::stdin().read_to_string(&mut buf)?;
|
|
||||||
Ok(buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_file_to_string(path: &Path) -> io::Result<String> {
|
|
||||||
fs::read_to_string(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() -> Result<(), ParseError> {
|
|
||||||
// Accept a single positional arg as the input path.
|
|
||||||
// "-" means read all of stdin.
|
|
||||||
let mut args = env::args().skip(1);
|
|
||||||
|
|
||||||
if let Some(arg1) = args.next() {
|
|
||||||
if arg1 == "-h" || arg1 == "--help" {
|
|
||||||
println!("Usage:");
|
|
||||||
println!(
|
|
||||||
" {} # REPL",
|
|
||||||
env::args().next().unwrap_or_else(|| "prog".into())
|
|
||||||
);
|
|
||||||
println!(
|
|
||||||
" {} <file> # parse file",
|
|
||||||
env::args().next().unwrap_or_else(|| "prog".into())
|
|
||||||
);
|
|
||||||
println!(
|
|
||||||
" {} - # read source from stdin",
|
|
||||||
env::args().next().unwrap_or_else(|| "prog".into())
|
|
||||||
);
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
|
|
||||||
if arg1 == "-" {
|
|
||||||
match read_stdin_all() {
|
|
||||||
Ok(src) => return parse_and_print(&src),
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("stdin read error: {}", e);
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let path = Path::new(&arg1);
|
|
||||||
match read_file_to_string(path) {
|
|
||||||
Ok(src) => return parse_and_print(&src),
|
|
||||||
Err(e) => {
|
|
||||||
eprintln!("file read error ({}): {}", path.display(), e);
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// No filename provided -> keep REPL behavior
|
|
||||||
repl_once()
|
|
||||||
}
|
|
||||||
@ -1,122 +1,341 @@
|
|||||||
use std::{collections::HashSet, fs, path::PathBuf};
|
#![allow(
|
||||||
|
clippy::all,
|
||||||
|
clippy::pedantic,
|
||||||
|
clippy::nursery,
|
||||||
|
clippy::cargo,
|
||||||
|
clippy::restriction
|
||||||
|
)]
|
||||||
|
|
||||||
use rottlib::lexer::{DebugTools, TokenizedFile};
|
use std::{
|
||||||
|
collections::HashSet,
|
||||||
|
fs,
|
||||||
|
io::{self, Write},
|
||||||
|
path::PathBuf,
|
||||||
|
time::Instant,
|
||||||
|
};
|
||||||
|
|
||||||
/// Read `ignore.txt` (one path per line, `#` for comments) from root directory
|
use encoding_rs::Encoding;
|
||||||
/// and turn it into a canonicalized [`HashSet<PathBuf>`].
|
use rottlib::diagnostics::Diagnostic as Diag;
|
||||||
|
use rottlib::lexer::TokenizedFile;
|
||||||
|
use rottlib::parser::Parser;
|
||||||
|
|
||||||
|
mod pretty;
|
||||||
|
|
||||||
|
// ---------- CONFIG ----------
|
||||||
|
const FILE_LIMIT: usize = 10000; // cap on files scanned
|
||||||
|
const DIAG_SHOW_FIRST: usize = 12; // show first N diagnostics
|
||||||
|
const DIAG_SHOW_LAST: usize = 12; // show last N diagnostics
|
||||||
|
/// If true, print the old debug struct dump after each pretty diagnostic.
|
||||||
|
const ALSO_PRINT_DEBUG_AFTER_PRETTY: bool = true;
|
||||||
|
|
||||||
|
// Cargo.toml additions:
|
||||||
|
// is-terminal = "0.4"
|
||||||
|
// sysinfo = { version = "0.30", features = ["multithread"] }
|
||||||
|
// walkdir = "2"
|
||||||
|
// chardet = "0.2"
|
||||||
|
// encoding_rs = "0.8"
|
||||||
|
|
||||||
|
// Linux-only accurate RSS in MB. Fallback uses sysinfo.
|
||||||
|
fn rss_mb() -> u64 {
|
||||||
|
#[cfg(target_os = "linux")]
|
||||||
|
{
|
||||||
|
use std::io::Read;
|
||||||
|
let mut s = String::new();
|
||||||
|
if let Ok(mut f) = std::fs::File::open("/proc/self/statm")
|
||||||
|
&& f.read_to_string(&mut s).is_ok()
|
||||||
|
&& let Some(rss_pages) = s
|
||||||
|
.split_whitespace()
|
||||||
|
.nth(1)
|
||||||
|
.and_then(|x| x.parse::<u64>().ok())
|
||||||
|
{
|
||||||
|
let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) as u64 };
|
||||||
|
return (rss_pages * page) / (1024 * 1024);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
use sysinfo::{System, get_current_pid};
|
||||||
|
let mut sys = System::new();
|
||||||
|
sys.refresh_processes();
|
||||||
|
let Ok(pid) = get_current_pid() else { return 0 };
|
||||||
|
sys.process(pid).map_or(0, |p| p.memory() / 1024)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn mark(label: &str, t0: Instant) {
|
||||||
|
println!(
|
||||||
|
"[{:>14}] t={:>8.2?} rss={} MB",
|
||||||
|
label,
|
||||||
|
t0.elapsed(),
|
||||||
|
rss_mb()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read `ignore.txt` next to `root` and build a canonicalized set.
|
||||||
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
|
fn load_ignore_set(root: &std::path::Path) -> HashSet<PathBuf> {
|
||||||
let ignore_file = root.join("ignore.txt");
|
let ignore_file = root.join("ignore.txt");
|
||||||
if !ignore_file.exists() {
|
if !ignore_file.exists() {
|
||||||
return HashSet::new();
|
return HashSet::new();
|
||||||
}
|
}
|
||||||
|
|
||||||
let content = match fs::read_to_string(&ignore_file) {
|
let content = match fs::read_to_string(&ignore_file) {
|
||||||
Ok(content) => content,
|
Ok(s) => s,
|
||||||
Err(error) => {
|
Err(e) => {
|
||||||
eprintln!("Could not read {}: {error}", ignore_file.display());
|
eprintln!("Could not read {}: {e}", ignore_file.display());
|
||||||
return HashSet::new();
|
return HashSet::new();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
content
|
content
|
||||||
.lines()
|
.lines()
|
||||||
.map(str::trim)
|
.map(str::trim)
|
||||||
.filter(|line| !line.is_empty() && !line.starts_with('#'))
|
.filter(|l| !l.is_empty() && !l.starts_with('#'))
|
||||||
.filter_map(|line| {
|
.filter_map(|line| {
|
||||||
let next_path = PathBuf::from(line);
|
let p = PathBuf::from(line);
|
||||||
let absolute_path = if next_path.is_absolute() {
|
let abs = if p.is_absolute() { p } else { root.join(p) };
|
||||||
next_path
|
fs::canonicalize(abs).ok()
|
||||||
} else {
|
|
||||||
root.join(next_path)
|
|
||||||
};
|
|
||||||
fs::canonicalize(absolute_path).ok()
|
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CLI: `verify_uc <root_dir>` - find all `.uc` files in the provided directory
|
/// Wait for Enter if running in a TTY, shown before printing errors.
|
||||||
/// (except those listed in `ignore.txt` in the root) and test them all.
|
fn wait_before_errors(msg: &str) {
|
||||||
///
|
let _ = io::stdout().flush();
|
||||||
/// Reported execution time is the tokenization time, without considering time
|
if is_terminal::is_terminal(io::stdin()) {
|
||||||
/// it takes to read files from disk.
|
eprint!("{msg}");
|
||||||
///
|
let _ = io::stderr().flush();
|
||||||
/// `ignore.txt` is for listing specific files, not directories.
|
let mut s = String::new();
|
||||||
fn main() {
|
let _ = io::stdin().read_line(&mut s);
|
||||||
let root_dir = std::env::args().nth(1).unwrap(); // it is fine to crash debug utility
|
}
|
||||||
let root = PathBuf::from(&root_dir);
|
}
|
||||||
|
|
||||||
|
/// CLI: `verify_uc <root_dir> [file_name]`
|
||||||
|
///
|
||||||
|
fn main() {
|
||||||
|
let mut args = std::env::args().skip(1);
|
||||||
|
let root_dir = args.next().unwrap_or_else(|| {
|
||||||
|
eprintln!("Usage: verify_uc <root_dir> [file_name]");
|
||||||
|
std::process::exit(1);
|
||||||
|
});
|
||||||
|
|
||||||
|
let target_raw = args.next(); // optional file name hint
|
||||||
|
let target_ci = target_raw.as_ref().map(|s| s.to_ascii_lowercase());
|
||||||
|
let single_mode = target_ci.is_some();
|
||||||
|
|
||||||
|
let root = PathBuf::from(&root_dir);
|
||||||
if !root.exists() {
|
if !root.exists() {
|
||||||
eprintln!("Root directory '{root_dir}' does not exist.");
|
eprintln!("Root directory '{root_dir}' does not exist.");
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load files
|
let t0 = Instant::now();
|
||||||
let ignored_paths = load_ignore_set(&root);
|
mark("baseline", t0);
|
||||||
|
|
||||||
|
// Stage 0: discover + read, bounded by FILE_LIMIT or first match in single_mode
|
||||||
|
let ignored = load_ignore_set(&root);
|
||||||
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
|
let mut uc_files: Vec<(PathBuf, String)> = Vec::new();
|
||||||
|
let mut seen = 0usize;
|
||||||
|
let mut picked_any = false;
|
||||||
|
|
||||||
for entry in walkdir::WalkDir::new(&root)
|
for entry in walkdir::WalkDir::new(&root)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.filter_map(Result::ok) // for debug tool this is ok
|
.filter_map(Result::ok)
|
||||||
.filter(|entry| {
|
.filter(|e| {
|
||||||
let path = entry.path();
|
let path = e.path();
|
||||||
// Skip anything explicitly ignored
|
if let Ok(abs) = fs::canonicalize(path)
|
||||||
if let Ok(absolute_path) = fs::canonicalize(path) {
|
&& ignored.contains(&abs)
|
||||||
if ignored_paths.contains(&absolute_path) {
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
// Must be *.uc
|
|
||||||
path.is_file()
|
path.is_file()
|
||||||
&& path
|
&& path
|
||||||
.extension()
|
.extension()
|
||||||
.and_then(|extension| extension.to_str())
|
.and_then(|e| e.to_str())
|
||||||
.is_some_and(|extension| extension.eq_ignore_ascii_case("uc"))
|
.is_some_and(|e| e.eq_ignore_ascii_case("uc"))
|
||||||
})
|
})
|
||||||
{
|
{
|
||||||
|
if !single_mode && seen >= FILE_LIMIT {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If in single-file mode, keep only the first whose file name matches.
|
||||||
|
if let Some(needle) = target_ci.as_deref() {
|
||||||
|
let fname = entry
|
||||||
|
.path()
|
||||||
|
.file_name()
|
||||||
|
.and_then(|s| s.to_str())
|
||||||
|
.unwrap_or("");
|
||||||
|
let fname_lc = fname.to_ascii_lowercase();
|
||||||
|
if !(fname_lc == needle || fname_lc.contains(needle)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seen += 1;
|
||||||
|
|
||||||
let path = entry.path();
|
let path = entry.path();
|
||||||
match fs::read(path) {
|
match fs::read(path) {
|
||||||
Ok(raw_bytes) => {
|
Ok(raw) => {
|
||||||
// Auto-detect encoding for old Unreal script sources
|
let (label, _, _) = chardet::detect(&raw);
|
||||||
let (encoding_label, _, _) = chardet::detect(&raw_bytes);
|
let enc = Encoding::for_label(label.as_bytes()).unwrap_or(encoding_rs::UTF_8);
|
||||||
let encoding = encoding_rs::Encoding::for_label(encoding_label.as_bytes())
|
let (txt, _, _) = enc.decode(&raw);
|
||||||
.unwrap_or(encoding_rs::UTF_8);
|
uc_files.push((path.to_path_buf(), txt.into_owned()));
|
||||||
let (decoded_text, _, _) = encoding.decode(&raw_bytes);
|
picked_any = true;
|
||||||
uc_files.push((path.to_path_buf(), decoded_text.into_owned()));
|
if single_mode {
|
||||||
|
// Only the first match.
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
Err(error) => {
|
}
|
||||||
eprintln!("Failed to read `{}`: {error}", path.display());
|
Err(e) => {
|
||||||
|
wait_before_errors("Read error detected. Press Enter to print details...");
|
||||||
|
eprintln!("Failed to read `{}`: {e}", path.display());
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
println!("Loaded {} .uc files into memory.", uc_files.len());
|
|
||||||
|
|
||||||
// Tokenize and measure performance
|
if single_mode && !picked_any {
|
||||||
let start_time = std::time::Instant::now();
|
let needle = target_raw.as_deref().unwrap();
|
||||||
let tokenized_files: Vec<(PathBuf, TokenizedFile)> = uc_files
|
eprintln!(
|
||||||
.iter()
|
"No .uc file matching '{needle}' found under '{}'.",
|
||||||
.map(|(path, source_code)| {
|
root.display()
|
||||||
let tokenized_file = TokenizedFile::from_str(source_code);
|
);
|
||||||
if tokenized_file.has_errors() {
|
std::process::exit(1);
|
||||||
println!("TK: {}", path.display());
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Loaded {} .uc files into memory (cap={}, reached={}).",
|
||||||
|
uc_files.len(),
|
||||||
|
FILE_LIMIT,
|
||||||
|
if !single_mode && uc_files.len() >= FILE_LIMIT {
|
||||||
|
"yes"
|
||||||
|
} else {
|
||||||
|
"no"
|
||||||
|
}
|
||||||
|
);
|
||||||
|
mark("after_read", t0);
|
||||||
|
|
||||||
|
// Stage 1: tokenize all
|
||||||
|
let t_tok = Instant::now();
|
||||||
|
let mut tokenized: Vec<(PathBuf, TokenizedFile)> = Vec::with_capacity(uc_files.len());
|
||||||
|
let mut tk_error_idx: Option<usize> = None;
|
||||||
|
|
||||||
|
for (i, (path, source)) in uc_files.iter().enumerate() {
|
||||||
|
let tf = TokenizedFile::tokenize(source);
|
||||||
|
if tk_error_idx.is_none() && tf.has_errors() {
|
||||||
|
tk_error_idx = Some(i);
|
||||||
|
}
|
||||||
|
tokenized.push((path.clone(), tf));
|
||||||
}
|
}
|
||||||
(path.clone(), tokenized_file)
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let elapsed_time = start_time.elapsed();
|
|
||||||
println!(
|
println!(
|
||||||
"Tokenized {} files in {:.2?}",
|
"Tokenized {} files in {:.2?}",
|
||||||
tokenized_files.len(),
|
tokenized.len(),
|
||||||
elapsed_time
|
t_tok.elapsed()
|
||||||
);
|
);
|
||||||
|
mark("after_tokenize", t0);
|
||||||
|
|
||||||
// Round-trip check
|
// If tokenization error: wait, dump tokens for the first failing file, then exit.
|
||||||
for ((path, original), (_, tokenized_file)) in uc_files.iter().zip(tokenized_files.iter()) {
|
if let Some(idx) = tk_error_idx {
|
||||||
let reconstructed = tokenized_file.reconstruct_source();
|
let (bad_path, _) = &tokenized[idx];
|
||||||
if original != &reconstructed {
|
wait_before_errors("Tokenization error found. Press Enter to dump tokens...");
|
||||||
eprintln!("Reconstruction mismatch in `{}`!", path.display());
|
eprintln!("--- Tokenization error in: {}", bad_path.display());
|
||||||
|
//bad_tf.dump_debug_layout(); // from DebugTools
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Stage 2: parse all with ONE arena kept alive
|
||||||
|
let arena = rottlib::arena::Arena::new();
|
||||||
|
let t_parse = Instant::now();
|
||||||
|
|
||||||
|
// First failing parse: (tokenized_index, diagnostics, fatal)
|
||||||
|
let mut first_fail: Option<(usize, Vec<Diag>, Option<String>)> = None;
|
||||||
|
|
||||||
|
for (i, (path, tk)) in tokenized.iter().enumerate() {
|
||||||
|
// --- progress line BEFORE parsing this file ---
|
||||||
|
{
|
||||||
|
use std::io::Write;
|
||||||
|
eprint!(
|
||||||
|
"Parsing [{}/{}] {} | rss={} MB\r\n",
|
||||||
|
i + 1,
|
||||||
|
tokenized.len(),
|
||||||
|
path.display(),
|
||||||
|
rss_mb()
|
||||||
|
);
|
||||||
|
let _ = io::stderr().flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("All .uc files matched successfully.");
|
let mut parser = Parser::new(tk, &arena);
|
||||||
|
|
||||||
|
match parser.parse_source_file() {
|
||||||
|
Ok(_) => {
|
||||||
|
if !parser.diagnostics.is_empty() && first_fail.is_none() {
|
||||||
|
first_fail = Some((i, parser.diagnostics.clone(), None));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => {
|
||||||
|
if first_fail.is_none() {
|
||||||
|
first_fail = Some((i, parser.diagnostics.clone(), Some(format!("{e:?}"))));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Parsed {} files in {:.2?}",
|
||||||
|
tokenized.len(),
|
||||||
|
t_parse.elapsed()
|
||||||
|
);
|
||||||
|
mark("after_parse", t0);
|
||||||
|
|
||||||
|
// Summary
|
||||||
|
println!("--- Summary ---");
|
||||||
|
println!("Files processed: {}", tokenized.len());
|
||||||
|
println!("File cap: {FILE_LIMIT}");
|
||||||
|
|
||||||
|
if let Some((idx, diags, fatal)) = first_fail {
|
||||||
|
wait_before_errors("Parse issues detected. Press Enter to print diagnostics...");
|
||||||
|
let (path, tf) = &tokenized[idx];
|
||||||
|
eprintln!("--- Parse issues in first failing file ---");
|
||||||
|
eprintln!("File: {}", path.display());
|
||||||
|
if let Some(f) = &fatal {
|
||||||
|
eprintln!("Fatal parse error: {f}");
|
||||||
|
}
|
||||||
|
if diags.is_empty() && fatal.is_none() {
|
||||||
|
eprintln!("(no diagnostics captured)");
|
||||||
|
} else {
|
||||||
|
let use_colors = is_terminal::is_terminal(io::stderr());
|
||||||
|
let fname = path.display().to_string();
|
||||||
|
let total = diags.len();
|
||||||
|
let first_n = DIAG_SHOW_FIRST.min(total);
|
||||||
|
let last_n = DIAG_SHOW_LAST.min(total.saturating_sub(first_n));
|
||||||
|
|
||||||
|
if total > first_n + last_n {
|
||||||
|
// first window
|
||||||
|
for (k, d) in diags.iter().take(first_n).enumerate() {
|
||||||
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||||
|
eprintln!("{s}");
|
||||||
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||||
|
eprintln!("#{}: {:#?}", k + 1, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
eprintln!("... {} diagnostics omitted ...", total - (first_n + last_n));
|
||||||
|
// last window
|
||||||
|
let start = total - last_n;
|
||||||
|
for (offset, d) in diags.iter().skip(start).enumerate() {
|
||||||
|
let idx_global = start + offset + 1;
|
||||||
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||||
|
eprintln!("{s}");
|
||||||
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||||
|
eprintln!("#{idx_global}: {d:#?}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (k, d) in diags.iter().enumerate() {
|
||||||
|
let s = pretty::render_diagnostic(d, tf, Some(&fname), use_colors);
|
||||||
|
eprintln!("{s}");
|
||||||
|
if ALSO_PRINT_DEBUG_AFTER_PRETTY {
|
||||||
|
eprintln!("#{}: {:#?}", k + 1, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("All files parsed without diagnostics.");
|
||||||
}
|
}
|
||||||
|
|||||||
85
dev_tests/src/verify_expr.rs
Normal file
85
dev_tests/src/verify_expr.rs
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
#![allow(
|
||||||
|
clippy::all,
|
||||||
|
clippy::pedantic,
|
||||||
|
clippy::nursery,
|
||||||
|
clippy::cargo,
|
||||||
|
clippy::restriction
|
||||||
|
)]
|
||||||
|
|
||||||
|
use rottlib::arena::Arena;
|
||||||
|
use rottlib::lexer::TokenizedFile;
|
||||||
|
use rottlib::parser::Parser;
|
||||||
|
|
||||||
|
mod pretty;
|
||||||
|
|
||||||
|
/// Expressions to test.
|
||||||
|
///
|
||||||
|
/// Add, remove, or edit entries here.
|
||||||
|
/// Using `(&str, &str)` gives each case a human-readable label.
|
||||||
|
const TEST_CASES: &[(&str, &str)] = &[
|
||||||
|
("simple_add", "1 + 2 * 3"),
|
||||||
|
("member_call", "Foo.Bar(1, 2)"),
|
||||||
|
("index_member", "arr[5].X"),
|
||||||
|
("tagged_name", "Class'MyPackage.MyThing'"),
|
||||||
|
("broken_expr", "a + (]\n//AAA\n//BBB\n//CCC\n//DDD\n//EEE\n//FFF"),
|
||||||
|
];
|
||||||
|
|
||||||
|
/// If true, print the parsed expression using Debug formatting.
|
||||||
|
const PRINT_PARSED_EXPR: bool = false;
|
||||||
|
|
||||||
|
/// If true, print diagnostics even when parsing returned a value.
|
||||||
|
const ALWAYS_PRINT_DIAGNOSTICS: bool = true;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let arena = Arena::new();
|
||||||
|
|
||||||
|
println!("Running {} expression test case(s)...", TEST_CASES.len());
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let mut had_any_problem = false;
|
||||||
|
|
||||||
|
for (idx, (label, source)) in TEST_CASES.iter().enumerate() {
|
||||||
|
println!("============================================================");
|
||||||
|
println!("Case #{:02}: {}", idx + 1, label);
|
||||||
|
println!("Source: {}", source);
|
||||||
|
println!("------------------------------------------------------------");
|
||||||
|
|
||||||
|
let tf = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
let mut parser = Parser::new(&tf, &arena);
|
||||||
|
let expr = parser.parse_expression();
|
||||||
|
|
||||||
|
println!("parse_expression() returned.");
|
||||||
|
|
||||||
|
if PRINT_PARSED_EXPR {
|
||||||
|
println!("Parsed expression:");
|
||||||
|
println!("{expr:#?}");
|
||||||
|
}
|
||||||
|
|
||||||
|
if parser.diagnostics.is_empty() {
|
||||||
|
println!("Diagnostics: none");
|
||||||
|
} else {
|
||||||
|
had_any_problem = true;
|
||||||
|
println!("Diagnostics: {}", parser.diagnostics.len());
|
||||||
|
|
||||||
|
if ALWAYS_PRINT_DIAGNOSTICS {
|
||||||
|
let use_colors = false;
|
||||||
|
for (k, diag) in parser.diagnostics.iter().enumerate() {
|
||||||
|
let rendered = pretty::render_diagnostic(diag, &tf, Some(label), use_colors);
|
||||||
|
println!("Diagnostic #{}:", k + 1);
|
||||||
|
println!("{rendered}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("============================================================");
|
||||||
|
if had_any_problem {
|
||||||
|
println!("Done. At least one case had tokenization or parse diagnostics.");
|
||||||
|
std::process::exit(1);
|
||||||
|
} else {
|
||||||
|
println!("Done. All cases completed without diagnostics.");
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
perf.data.old
Normal file
BIN
perf.data.old
Normal file
Binary file not shown.
@ -7,6 +7,11 @@ edition = "2024"
|
|||||||
default = []
|
default = []
|
||||||
debug = []
|
debug = []
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
logos = "0.15"
|
logos = "0.15"
|
||||||
bumpalo = { version = "3", features = ["boxed", "collections"] }
|
bumpalo = { version = "3", features = ["boxed", "collections"] }
|
||||||
|
backtrace = "0.3"
|
||||||
|
crossterm = "0.*"
|
||||||
@ -1,21 +1,26 @@
|
|||||||
//! Arena submodule defining types that exist in their own memory space and
|
//! Arena submodule defining types allocated from a dedicated bump arena,
|
||||||
//! allow multiple cheap allocations (both performance- and fragmentation-wise).
|
//! allowing many cheap allocations with fast bulk reclamation.
|
||||||
//!
|
//!
|
||||||
//! ## Memory safety
|
//! ## Destruction and resource management
|
||||||
//!
|
//!
|
||||||
//! Dropping the [`Arena`] frees all its memory at once and does not run
|
//! Dropping the [`Arena`] reclaims the arena's memory in bulk. Destructors are
|
||||||
//! [`Drop`] for values allocated within it. Avoid storing types that implement
|
//! not run for arena allocations that are still live at that point. Therefore,
|
||||||
//! [`Drop`] or own external resources inside [`ArenaNode`], [`ArenaVec`], or
|
//! avoid storing types whose cleanup must reliably happen at arena release,
|
||||||
//! [`ArenaString`]. If you must, arrange an explicit "drain/drop" pass before
|
//! especially types that own memory allocations or external resources outside
|
||||||
//! the arena is dropped.
|
//! the arena.
|
||||||
|
//!
|
||||||
|
//! [`ArenaNode`], [`ArenaVec`], and [`ArenaString`] are provided so commonly
|
||||||
|
//! used owned data can keep their storage inside the arena rather than in
|
||||||
|
//! separate global-heap allocations.
|
||||||
|
|
||||||
|
use core::borrow::Borrow;
|
||||||
use core::fmt::{Debug, Display, Formatter, Result};
|
use core::fmt::{Debug, Display, Formatter, Result};
|
||||||
use core::ops::{Deref, DerefMut};
|
use core::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
use bumpalo::{Bump, boxed, collections};
|
use bumpalo::{Bump, boxed, collections};
|
||||||
|
|
||||||
use crate::ast::AstSpan;
|
use crate::ast::AstSpan;
|
||||||
use crate::lexer::TokenLocation;
|
use crate::lexer::TokenPosition;
|
||||||
|
|
||||||
/// Object that manages a separate memory space, which can be deallocated all
|
/// Object that manages a separate memory space, which can be deallocated all
|
||||||
/// at once after use.
|
/// at once after use.
|
||||||
@ -23,11 +28,8 @@ use crate::lexer::TokenLocation;
|
|||||||
/// All allocations borrow the arena immutably.
|
/// All allocations borrow the arena immutably.
|
||||||
///
|
///
|
||||||
/// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it
|
/// Dropping the [`Arena`] does not run [`Drop`] for values allocated within it
|
||||||
/// (including values contained in [`ArenaNode`], [`ArenaVec`]
|
/// (including values contained in [`ArenaNode`], [`ArenaVec`] and
|
||||||
/// and [`ArenaString`]).
|
/// [`ArenaString`]).
|
||||||
///
|
|
||||||
/// This arena is not thread-safe (`!Send`, `!Sync`). Values borrow the arena
|
|
||||||
/// and therefore cannot be sent across threads independently.
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Arena {
|
pub struct Arena {
|
||||||
bump: Bump,
|
bump: Bump,
|
||||||
@ -48,38 +50,47 @@ impl Arena {
|
|||||||
ArenaVec(collections::Vec::new_in(&self.bump))
|
ArenaVec(collections::Vec::new_in(&self.bump))
|
||||||
}
|
}
|
||||||
|
|
||||||
///Allocates a copy of `string` in this arena and returns
|
/// Allocates a copy of `string` in this arena and returns
|
||||||
/// an [`ArenaString`].
|
/// an [`ArenaString`].
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn string(&self, string: &str) -> ArenaString<'_> {
|
pub fn string(&self, string: &str) -> ArenaString<'_> {
|
||||||
ArenaString(collections::String::from_str_in(string, &self.bump))
|
ArenaString(collections::String::from_str_in(string, &self.bump))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Allocates `value` in this arena with the given `span`,
|
/// Allocates `value` in this arena and attaches `span`.
|
||||||
/// returning an [`ArenaNode`].
|
|
||||||
///
|
///
|
||||||
/// The node's storage borrows this arena and cannot outlive it.
|
/// The returned node borrows the arena and cannot outlive it.
|
||||||
///
|
/// If it is still live when the arena is dropped, its destructor is not run.
|
||||||
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn alloc<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> {
|
pub fn alloc_node<T>(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> {
|
||||||
ArenaNode {
|
ArenaNode {
|
||||||
inner: boxed::Box::new_in(value, &self.bump),
|
value: boxed::Box::new_in(value, &self.bump),
|
||||||
span,
|
span,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn alloc_between<T>(
|
/// Allocates `value` in this arena and attaches the span from `start` to
|
||||||
|
/// `end`.
|
||||||
|
///
|
||||||
|
/// The returned node borrows the arena and cannot outlive it.
|
||||||
|
/// If it is still live when the arena is dropped, its destructor is not run.
|
||||||
|
#[must_use]
|
||||||
|
pub fn alloc_node_between<T>(
|
||||||
&self,
|
&self,
|
||||||
value: T,
|
value: T,
|
||||||
from: TokenLocation,
|
start: TokenPosition,
|
||||||
to: TokenLocation,
|
end: TokenPosition,
|
||||||
) -> ArenaNode<'_, T> {
|
) -> ArenaNode<'_, T> {
|
||||||
self.alloc(value, AstSpan { from, to })
|
self.alloc_node(value, AstSpan::range(start, end))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn alloc_at<T>(&self, value: T, at: TokenLocation) -> ArenaNode<'_, T> {
|
/// Allocates `value` in this arena and attaches a span covering `at`.
|
||||||
self.alloc(value, AstSpan { from: at, to: at })
|
///
|
||||||
|
/// The returned node borrows the arena and cannot outlive it.
|
||||||
|
/// If it is still live when the arena is dropped, its destructor is not run.
|
||||||
|
#[must_use]
|
||||||
|
pub fn alloc_node_at<T>(&self, value: T, at: TokenPosition) -> ArenaNode<'_, T> {
|
||||||
|
self.alloc_node(value, AstSpan::new(at))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -91,15 +102,11 @@ impl Default for Arena {
|
|||||||
|
|
||||||
/// An arena-allocated box with an attached source span.
|
/// An arena-allocated box with an attached source span.
|
||||||
///
|
///
|
||||||
/// Equality and hashing take into account both the contained `T` and the `span`
|
/// Dropping the node normally runs `Drop` for the inner value.
|
||||||
/// (when `T: Eq + Hash`).
|
/// Dropping the arena does not itself perform a separate destructor pass.
|
||||||
///
|
|
||||||
/// Note: `T`'s [`Drop`] is not run when the arena is dropped.
|
|
||||||
#[derive(Hash, PartialEq, Eq)]
|
#[derive(Hash, PartialEq, Eq)]
|
||||||
pub struct ArenaNode<'arena, T> {
|
pub struct ArenaNode<'arena, T> {
|
||||||
/// Value allocated in the arena; this node owns it.
|
value: boxed::Box<'arena, T>,
|
||||||
inner: boxed::Box<'arena, T>,
|
|
||||||
/// Token range covered by the value.
|
|
||||||
span: AstSpan,
|
span: AstSpan,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -108,74 +115,53 @@ impl<'arena, T> ArenaNode<'arena, T> {
|
|||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self {
|
pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self {
|
||||||
Self {
|
Self {
|
||||||
inner: boxed::Box::new_in(value, &arena.bump),
|
value: boxed::Box::new_in(value, &arena.bump),
|
||||||
span,
|
span,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Creates a new [`ArenaNode`] for an AST node that spans a single token.
|
/// Returns a mutable reference to the token span covered by this node.
|
||||||
pub fn from_token_location(
|
#[must_use]
|
||||||
value: T,
|
pub const fn span_mut(&mut self) -> &mut AstSpan {
|
||||||
token_location: crate::lexer::TokenLocation,
|
|
||||||
arena: &'arena Arena,
|
|
||||||
) -> Self {
|
|
||||||
Self {
|
|
||||||
inner: boxed::Box::new_in(value, &arena.bump),
|
|
||||||
span: AstSpan {
|
|
||||||
from: token_location,
|
|
||||||
to: token_location,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn span_mut(&mut self) -> &mut AstSpan {
|
|
||||||
&mut self.span
|
&mut self.span
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn extend_to(&mut self, to: TokenLocation) {
|
|
||||||
self.span.to = to;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn extend_from(&mut self, from: TokenLocation) {
|
|
||||||
self.span.from = from;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the token span covered by this node.
|
/// Returns the token span covered by this node.
|
||||||
pub fn span(&self) -> &AstSpan {
|
#[must_use]
|
||||||
|
pub const fn span(&self) -> &AstSpan {
|
||||||
&self.span
|
&self.span
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, T> Deref for ArenaNode<'arena, T> {
|
impl<T> Deref for ArenaNode<'_, T> {
|
||||||
type Target = T;
|
type Target = T;
|
||||||
|
|
||||||
fn deref(&self) -> &T {
|
fn deref(&self) -> &T {
|
||||||
&self.inner
|
&self.value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, T> DerefMut for ArenaNode<'arena, T> {
|
impl<T> DerefMut for ArenaNode<'_, T> {
|
||||||
fn deref_mut(&mut self) -> &mut T {
|
fn deref_mut(&mut self) -> &mut T {
|
||||||
&mut self.inner
|
&mut self.value
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, T: Debug> Debug for ArenaNode<'arena, T> {
|
impl<T: Debug> Debug for ArenaNode<'_, T> {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
f.debug_struct("ArenaNode")
|
f.debug_struct("ArenaNode")
|
||||||
.field("inner", &**self)
|
.field("inner", &**self)
|
||||||
.field("span", &self.span())
|
.field("span", self.span())
|
||||||
.finish()
|
.finish()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Version of [`Vec`] that can be safely used inside a memory arena.
|
/// Version of [`Vec`] whose backing storage lives in the arena.
|
||||||
///
|
///
|
||||||
/// Elements do not have their destructors run when the arena is dropped.
|
/// Elements are dropped when the `ArenaVec` itself is dropped normally.
|
||||||
///
|
/// Capacity growth may leave old buffers in the arena until the whole arena
|
||||||
/// This type dereferences to `[T]` and supports iteration by reference
|
/// is reclaimed.
|
||||||
/// (`&ArenaVec` and `&mut ArenaVec` implement [`IntoIterator`]).
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
|
||||||
pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>);
|
pub struct ArenaVec<'arena, T>(collections::Vec<'arena, T>);
|
||||||
|
|
||||||
impl<'arena, T> ArenaVec<'arena, T> {
|
impl<'arena, T> ArenaVec<'arena, T> {
|
||||||
@ -190,18 +176,28 @@ impl<'arena, T> ArenaVec<'arena, T> {
|
|||||||
/// Growth is backed by the arena; increasing capacity allocates new space
|
/// Growth is backed by the arena; increasing capacity allocates new space
|
||||||
/// in the arena and never frees previous blocks.
|
/// in the arena and never frees previous blocks.
|
||||||
pub fn push(&mut self, value: T) {
|
pub fn push(&mut self, value: T) {
|
||||||
self.0.push(value)
|
self.0.push(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Reserves capacity for at least `additional` more elements.
|
||||||
|
///
|
||||||
|
/// The collection may reserve more space to avoid frequent reallocations.
|
||||||
|
/// If growth requires a new allocation, the previous buffer remains in the
|
||||||
|
/// arena until the arena is reclaimed.
|
||||||
pub fn reserve(&mut self, additional: usize) {
|
pub fn reserve(&mut self, additional: usize) {
|
||||||
self.0.reserve(additional)
|
self.0.reserve(additional);
|
||||||
}
|
}
|
||||||
pub fn extend<I: IntoIterator<Item = T>>(&mut self, it: I) {
|
|
||||||
self.0.extend(it)
|
/// Extends the vector with the contents of `items`.
|
||||||
|
///
|
||||||
|
/// Growth may allocate a new buffer in the arena and leave the previous
|
||||||
|
/// buffer in place until the arena is reclaimed.
|
||||||
|
pub fn extend<I: IntoIterator<Item = T>>(&mut self, items: I) {
|
||||||
|
self.0.extend(items);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, T> Deref for ArenaVec<'arena, T> {
|
impl<T> Deref for ArenaVec<'_, T> {
|
||||||
type Target = [T];
|
type Target = [T];
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
@ -209,48 +205,41 @@ impl<'arena, T> Deref for ArenaVec<'arena, T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, T> DerefMut for ArenaVec<'arena, T> {
|
impl<T> DerefMut for ArenaVec<'_, T> {
|
||||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||||
&mut self.0
|
&mut self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, 's, T> IntoIterator for &'s ArenaVec<'arena, T> {
|
impl<'iter, T> IntoIterator for &'iter ArenaVec<'_, T> {
|
||||||
type Item = &'s T;
|
type Item = &'iter T;
|
||||||
type IntoIter = core::slice::Iter<'s, T>;
|
type IntoIter = core::slice::Iter<'iter, T>;
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.iter()
|
self.0.iter()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena, 's, T> IntoIterator for &'s mut ArenaVec<'arena, T> {
|
impl<'iter, T> IntoIterator for &'iter mut ArenaVec<'_, T> {
|
||||||
type Item = &'s mut T;
|
type Item = &'iter mut T;
|
||||||
type IntoIter = core::slice::IterMut<'s, T>;
|
type IntoIter = core::slice::IterMut<'iter, T>;
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
self.0.iter_mut()
|
self.0.iter_mut()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Version of [`String`] that can be safely used inside a memory arena.
|
/// Version of [`String`] whose backing storage lives in the arena.
|
||||||
///
|
#[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
/// This type dereferences to [`str`] and implements [`AsRef<str>`] and
|
|
||||||
/// [`core::borrow::Borrow<str>`] for ergonomic use with APIs expecting string
|
|
||||||
/// slices.
|
|
||||||
///
|
|
||||||
/// The string borrows the arena and cannot outlive it. Dropping the arena
|
|
||||||
/// frees its memory without running `Drop` for the string contents.
|
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
|
||||||
pub struct ArenaString<'arena>(collections::String<'arena>);
|
pub struct ArenaString<'arena>(collections::String<'arena>);
|
||||||
|
|
||||||
impl<'arena> ArenaString<'arena> {
|
impl<'arena> ArenaString<'arena> {
|
||||||
/// Allocates a copy of `string` in `arena` and returns an [`ArenaString`].
|
/// Allocates a copy of `string` in `arena` and returns an [`ArenaString`].
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn from_str_in(string: &str, arena: &'arena Arena) -> Self {
|
pub fn from_str_in(text: &str, arena: &'arena Arena) -> Self {
|
||||||
Self(collections::String::from_str_in(string, &arena.bump))
|
Self(collections::String::from_str_in(text, &arena.bump))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena> Deref for ArenaString<'arena> {
|
impl Deref for ArenaString<'_> {
|
||||||
type Target = str;
|
type Target = str;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
@ -258,19 +247,19 @@ impl<'arena> Deref for ArenaString<'arena> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena> AsRef<str> for ArenaString<'arena> {
|
impl AsRef<str> for ArenaString<'_> {
|
||||||
fn as_ref(&self) -> &str {
|
fn as_ref(&self) -> &str {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena> core::borrow::Borrow<str> for ArenaString<'arena> {
|
impl Borrow<str> for ArenaString<'_> {
|
||||||
fn borrow(&self) -> &str {
|
fn borrow(&self) -> &str {
|
||||||
&self.0
|
&self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'arena> Display for ArenaString<'arena> {
|
impl Display for ArenaString<'_> {
|
||||||
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result {
|
||||||
Display::fmt(&self.0, f)
|
Display::fmt(&self.0, f)
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,387 +0,0 @@
|
|||||||
use crate::arena::ArenaVec;
|
|
||||||
|
|
||||||
use super::lexer::TokenLocation;
|
|
||||||
|
|
||||||
use core::fmt;
|
|
||||||
|
|
||||||
use crate::arena::{Arena, ArenaNode, ArenaString};
|
|
||||||
|
|
||||||
// All inclusive!
|
|
||||||
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
|
||||||
pub struct AstSpan {
|
|
||||||
pub from: TokenLocation,
|
|
||||||
pub to: TokenLocation,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl AstSpan {
|
|
||||||
pub fn merge(left_span: &AstSpan, right_span: &AstSpan) -> AstSpan {
|
|
||||||
AstSpan {
|
|
||||||
from: left_span.from,
|
|
||||||
to: right_span.to,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn new(single_location: TokenLocation) -> AstSpan {
|
|
||||||
AstSpan {
|
|
||||||
from: single_location,
|
|
||||||
to: single_location,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn range(from: TokenLocation, to: TokenLocation) -> AstSpan {
|
|
||||||
AstSpan { from, to }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn extend_to(&mut self, right_most_location: TokenLocation) {
|
|
||||||
if right_most_location > self.to {
|
|
||||||
self.to = right_most_location
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
|
||||||
pub enum PrefixOperator {
|
|
||||||
Not,
|
|
||||||
Minus,
|
|
||||||
BitwiseNot,
|
|
||||||
Increment,
|
|
||||||
Decrement,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
|
||||||
pub enum PostfixOperator {
|
|
||||||
Increment,
|
|
||||||
Decrement,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
|
||||||
pub enum InfixOperator {
|
|
||||||
// Assignments
|
|
||||||
Assign,
|
|
||||||
MultiplyAssign,
|
|
||||||
DivideAssign,
|
|
||||||
ModuloAssign,
|
|
||||||
PlusAssign,
|
|
||||||
MinusAssign,
|
|
||||||
ConcatAssign,
|
|
||||||
ConcatSpaceAssign,
|
|
||||||
// String operations
|
|
||||||
ConcatSpace,
|
|
||||||
Concat,
|
|
||||||
// Logical
|
|
||||||
And,
|
|
||||||
Xor,
|
|
||||||
Or,
|
|
||||||
// Bit-wise
|
|
||||||
BitwiseAnd,
|
|
||||||
BitwiseOr,
|
|
||||||
BitwiseXor,
|
|
||||||
// Not-equal
|
|
||||||
NotEqual,
|
|
||||||
// Comparison
|
|
||||||
Equal,
|
|
||||||
ApproximatelyEqual,
|
|
||||||
Less,
|
|
||||||
LessEqual,
|
|
||||||
Greater,
|
|
||||||
GreaterEqual,
|
|
||||||
ClockwiseFrom,
|
|
||||||
// Shifts
|
|
||||||
LeftShift,
|
|
||||||
LogicalRightShift,
|
|
||||||
RightShift,
|
|
||||||
// Terms
|
|
||||||
Plus,
|
|
||||||
Minus,
|
|
||||||
// Modulo
|
|
||||||
Modulo,
|
|
||||||
// Factor
|
|
||||||
Multiply,
|
|
||||||
Divide,
|
|
||||||
Dot,
|
|
||||||
Cross,
|
|
||||||
// Exponentiation
|
|
||||||
Exponentiation,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(clippy::large_enum_variant)]
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum Expression<'src, 'arena> {
|
|
||||||
Binary(
|
|
||||||
ExpressionRef<'src, 'arena>,
|
|
||||||
InfixOperator,
|
|
||||||
ExpressionRef<'src, 'arena>,
|
|
||||||
),
|
|
||||||
LeftUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
|
|
||||||
RightUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
|
|
||||||
|
|
||||||
Identifier(&'src str),
|
|
||||||
String(ArenaString<'arena>),
|
|
||||||
Integer(i128),
|
|
||||||
Float(f64),
|
|
||||||
|
|
||||||
Bool(bool),
|
|
||||||
None,
|
|
||||||
Parentheses(ExpressionRef<'src, 'arena>),
|
|
||||||
|
|
||||||
Block {
|
|
||||||
// All these end with `;`
|
|
||||||
statements: ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
|
||||||
// Last statement, but only if it doesn't end with `;`
|
|
||||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
},
|
|
||||||
If {
|
|
||||||
condition: ExpressionRef<'src, 'arena>,
|
|
||||||
body: ExpressionRef<'src, 'arena>,
|
|
||||||
else_body: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
},
|
|
||||||
While {
|
|
||||||
condition: ExpressionRef<'src, 'arena>,
|
|
||||||
body: ExpressionRef<'src, 'arena>,
|
|
||||||
},
|
|
||||||
DoUntil {
|
|
||||||
condition: ExpressionRef<'src, 'arena>,
|
|
||||||
body: ExpressionRef<'src, 'arena>,
|
|
||||||
},
|
|
||||||
ForEach {
|
|
||||||
iterator: ExpressionRef<'src, 'arena>,
|
|
||||||
body: ExpressionRef<'src, 'arena>,
|
|
||||||
},
|
|
||||||
For {
|
|
||||||
init: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
condition: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
step: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
body: ExpressionRef<'src, 'arena>,
|
|
||||||
},
|
|
||||||
Switch {
|
|
||||||
selector: ExpressionRef<'src, 'arena>,
|
|
||||||
cases: ArenaVec<'arena, CaseRef<'src, 'arena>>,
|
|
||||||
// default case
|
|
||||||
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
|
||||||
// last statement of the case block
|
|
||||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
},
|
|
||||||
Goto(ArenaString<'arena>),
|
|
||||||
Continue,
|
|
||||||
Break(Option<ExpressionRef<'src, 'arena>>),
|
|
||||||
Return(Option<ExpressionRef<'src, 'arena>>),
|
|
||||||
// For injecting in place of parts that couldn't be parsed
|
|
||||||
// (along with text that wasn't able to be parsed)
|
|
||||||
Error,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct VariableDeclarator<'src, 'arena> {
|
|
||||||
pub name: ArenaString<'arena>,
|
|
||||||
pub initializer: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct SwitchCase<'src, 'arena> {
|
|
||||||
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>, // UScript allows expressions; multiple labels ok
|
|
||||||
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, // allow fallthrough unless a Break/Goto ends it
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type CaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum Statement<'src, 'arena> {
|
|
||||||
// For the cases where user just used too many semi-colons `;;;;`
|
|
||||||
Empty,
|
|
||||||
Expression(ExpressionRef<'src, 'arena>),
|
|
||||||
// Just declarations without assignment:
|
|
||||||
// `local int i, j, k`
|
|
||||||
LocalVariableDeclaration {
|
|
||||||
type_name: ArenaString<'arena>,
|
|
||||||
identifiers: ArenaVec<'arena, ArenaString<'arena>>,
|
|
||||||
},
|
|
||||||
// Just `int i, j = 3, k = 0`
|
|
||||||
VariableDeclaration {
|
|
||||||
type_name: ArenaString<'arena>,
|
|
||||||
declarations: ArenaVec<'arena, VariableDeclarator<'src, 'arena>>,
|
|
||||||
},
|
|
||||||
Label(ArenaString<'arena>),
|
|
||||||
// For injecting in place of parts that couldn't be parsed
|
|
||||||
// (along with text that wasn't able to be parsed)
|
|
||||||
Error,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
|
|
||||||
|
|
||||||
impl<'src, 'arena> Expression<'src, 'arena> {
|
|
||||||
pub fn new_prefix(
|
|
||||||
arena: &'arena Arena,
|
|
||||||
op_position: TokenLocation,
|
|
||||||
op: PrefixOperator,
|
|
||||||
rhs: ArenaNode<'arena, Self>,
|
|
||||||
) -> ArenaNode<'arena, Self> {
|
|
||||||
let span = AstSpan {
|
|
||||||
from: op_position,
|
|
||||||
to: rhs.span().to,
|
|
||||||
};
|
|
||||||
ArenaNode::new_in(Self::LeftUnary(op, rhs), span, arena)
|
|
||||||
}
|
|
||||||
pub fn new_postfix(
|
|
||||||
arena: &'arena Arena,
|
|
||||||
lhs: ArenaNode<'arena, Self>,
|
|
||||||
op: PostfixOperator,
|
|
||||||
op_position: TokenLocation,
|
|
||||||
) -> ArenaNode<'arena, Self> {
|
|
||||||
let span = AstSpan {
|
|
||||||
from: lhs.span().from,
|
|
||||||
to: op_position,
|
|
||||||
};
|
|
||||||
ArenaNode::new_in(Self::RightUnary(lhs, op), span, arena)
|
|
||||||
}
|
|
||||||
pub fn new_binary(
|
|
||||||
arena: &'arena Arena,
|
|
||||||
lhs: ArenaNode<'arena, Self>,
|
|
||||||
op: InfixOperator,
|
|
||||||
rhs: ArenaNode<'arena, Self>,
|
|
||||||
) -> ArenaNode<'arena, Self> {
|
|
||||||
let span = AstSpan::merge(&lhs.span(), &rhs.span());
|
|
||||||
ArenaNode::new_in(Self::Binary(lhs, op, rhs), span, arena)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub enum DeclarationLiteral<'src, 'arena> {
|
|
||||||
None,
|
|
||||||
Bool(bool),
|
|
||||||
Integer(i128),
|
|
||||||
Float(f64),
|
|
||||||
String(ArenaString<'arena>),
|
|
||||||
Identifier(&'src str),
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type DeclarationLiteralRef<'src, 'arena> = (DeclarationLiteral<'src, 'arena>, TokenLocation);
|
|
||||||
|
|
||||||
/// Returns `true` for expressions that require `;` when used as a statement
|
|
||||||
/// (i.e., everything except blocky control-flow forms).
|
|
||||||
pub trait NeedsSemi {
|
|
||||||
fn needs_semicolon(&self) -> bool;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src, 'arena> NeedsSemi for Expression<'src, 'arena> {
|
|
||||||
#[inline]
|
|
||||||
fn needs_semicolon(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Expression::Block { .. }
|
|
||||||
| Expression::If { .. }
|
|
||||||
| Expression::While { .. }
|
|
||||||
| Expression::DoUntil { .. }
|
|
||||||
| Expression::ForEach { .. }
|
|
||||||
| Expression::For { .. }
|
|
||||||
| Expression::Error => false,
|
|
||||||
|
|
||||||
// All other expressions require `;` when used as a statement.
|
|
||||||
_ => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If `ArenaNode<T>` derefs to `T`, this works as-is.
|
|
||||||
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
|
|
||||||
impl<'src, 'arena> NeedsSemi for ExpressionRef<'src, 'arena> {
|
|
||||||
#[inline]
|
|
||||||
fn needs_semicolon(&self) -> bool {
|
|
||||||
(**self).needs_semicolon()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src, 'arena> NeedsSemi for Statement<'src, 'arena> {
|
|
||||||
#[inline]
|
|
||||||
fn needs_semicolon(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
Statement::Empty | Statement::Label { .. } | Statement::Error { .. } => false,
|
|
||||||
// All other expressions require `;` when used as a statement.
|
|
||||||
_ => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If `ArenaNode<T>` derefs to `T`, this works as-is.
|
|
||||||
// Otherwise, replace `(**self)` with your accessor, e.g. `self.value()` or `self.get()`.
|
|
||||||
impl<'src, 'arena> NeedsSemi for StatementRef<'src, 'arena> {
|
|
||||||
#[inline]
|
|
||||||
fn needs_semicolon(&self) -> bool {
|
|
||||||
(**self).needs_semicolon()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for PrefixOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
let s = match self {
|
|
||||||
PrefixOperator::Not => "!",
|
|
||||||
PrefixOperator::Minus => "-",
|
|
||||||
PrefixOperator::BitwiseNot => "~",
|
|
||||||
PrefixOperator::Increment => "++.",
|
|
||||||
PrefixOperator::Decrement => "--.",
|
|
||||||
};
|
|
||||||
write!(f, "{s}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl fmt::Display for PostfixOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
let s = match self {
|
|
||||||
PostfixOperator::Increment => ".++",
|
|
||||||
PostfixOperator::Decrement => ".--",
|
|
||||||
};
|
|
||||||
write!(f, "{s}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl fmt::Display for InfixOperator {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
use InfixOperator::*;
|
|
||||||
let s = match self {
|
|
||||||
// Assignments
|
|
||||||
Assign => "=",
|
|
||||||
MultiplyAssign => "*=",
|
|
||||||
DivideAssign => "/=",
|
|
||||||
ModuloAssign => "%=",
|
|
||||||
PlusAssign => "+=",
|
|
||||||
MinusAssign => "-=",
|
|
||||||
ConcatAssign => "$=",
|
|
||||||
ConcatSpaceAssign => "@=",
|
|
||||||
// String operations
|
|
||||||
ConcatSpace => "@",
|
|
||||||
Concat => "$",
|
|
||||||
// Logical
|
|
||||||
And => "&&",
|
|
||||||
Xor => "^^",
|
|
||||||
Or => "||",
|
|
||||||
// Bitwise
|
|
||||||
BitwiseAnd => "&",
|
|
||||||
BitwiseOr => "|",
|
|
||||||
BitwiseXor => "^",
|
|
||||||
// Not equal
|
|
||||||
NotEqual => "!=",
|
|
||||||
// Comparison
|
|
||||||
Equal => "==",
|
|
||||||
ApproximatelyEqual => "~+",
|
|
||||||
Less => "<",
|
|
||||||
LessEqual => "<=",
|
|
||||||
Greater => ">",
|
|
||||||
GreaterEqual => ">=",
|
|
||||||
ClockwiseFrom => "ClockwiseFrom",
|
|
||||||
// Shift
|
|
||||||
LeftShift => "<<",
|
|
||||||
LogicalRightShift => ">>>",
|
|
||||||
RightShift => ">>",
|
|
||||||
// Term
|
|
||||||
Plus => "+",
|
|
||||||
Minus => "-",
|
|
||||||
// Modulo
|
|
||||||
Modulo => "%",
|
|
||||||
// Factor
|
|
||||||
Multiply => "*",
|
|
||||||
Divide => "/",
|
|
||||||
Dot => "Dot",
|
|
||||||
Cross => "Cross",
|
|
||||||
// Exp
|
|
||||||
Exponentiation => "**",
|
|
||||||
};
|
|
||||||
write!(f, "{s}")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
235
rottlib/src/ast/callables.rs
Normal file
235
rottlib/src/ast/callables.rs
Normal file
@ -0,0 +1,235 @@
|
|||||||
|
//! Callable-declaration AST nodes.
|
||||||
|
//!
|
||||||
|
//! This module defines function-like declarations together with their
|
||||||
|
//! parameter lists and callable modifiers.
|
||||||
|
//!
|
||||||
|
//! The language groups several callable forms under a largely shared header
|
||||||
|
//! structure, including ordinary functions, events, delegates, and operator
|
||||||
|
//! declarations. This module preserves those forms as AST nodes together with
|
||||||
|
//! source-relevant modifier and parameter information.
|
||||||
|
|
||||||
|
use super::{
|
||||||
|
AstSpan, BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName,
|
||||||
|
PrefixOperatorName, TypeSpecifierRef,
|
||||||
|
};
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::lexer::{Keyword, TokenPosition};
|
||||||
|
|
||||||
|
use crate::arena::ArenaNode;
|
||||||
|
|
||||||
|
use core::convert::TryFrom;
|
||||||
|
|
||||||
|
/// Parameter modifier kind.
|
||||||
|
///
|
||||||
|
/// These modifiers apply to a single callable parameter and are preserved in
|
||||||
|
/// source order on the parameter node.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum ParameterModifierKind {
|
||||||
|
Optional,
|
||||||
|
Out,
|
||||||
|
Skip,
|
||||||
|
Coerce,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parameter modifier together with the source position of its token.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct ParameterModifier {
|
||||||
|
pub kind: ParameterModifierKind,
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One callable parameter declaration.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct Parameter<'src, 'arena> {
|
||||||
|
/// Parameter modifiers in source order.
|
||||||
|
pub modifiers: ArenaVec<'arena, ParameterModifier>,
|
||||||
|
/// Declared parameter type.
|
||||||
|
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
|
||||||
|
/// Declared parameter name.
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
/// Optional array-size expression from `[expr]`.
|
||||||
|
pub array_size: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
/// Optional default-value expression after `=`.
|
||||||
|
pub default_value: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a parameter node.
|
||||||
|
pub type ParameterRef<'src, 'arena> = ArenaNode<'arena, Parameter<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Syntactic callable declaration kind.
|
||||||
|
///
|
||||||
|
/// This enum distinguishes ordinary callable declarations from operator
|
||||||
|
/// declarations and preserves operator fixity / precedence where applicable.
|
||||||
|
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
|
||||||
|
pub enum CallableKind {
|
||||||
|
/// Ordinary function declaration: `function`.
|
||||||
|
Function,
|
||||||
|
/// Event declaration: `event`.
|
||||||
|
Event,
|
||||||
|
/// Delegate declaration: `delegate`.
|
||||||
|
Delegate,
|
||||||
|
/// Prefix operator declaration: `preoperator`.
|
||||||
|
PrefixOperator,
|
||||||
|
/// Infix operator declaration: `operator(<precedence>)`.
|
||||||
|
///
|
||||||
|
/// Precedence can be skipped as all supported operators already have
|
||||||
|
/// built-in precedence value that can't actually be changed in
|
||||||
|
/// `UnrealScript`. So omitting precedence when redefining operators is
|
||||||
|
/// a better approach.
|
||||||
|
InfixOperator(Option<u128>),
|
||||||
|
/// Postfix operator declaration: `postoperator`.
|
||||||
|
PostfixOperator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Keyword> for CallableKind {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
/// Converts a keyword into a [`CallableKind`] when the callable form
|
||||||
|
/// is fully determined by the keyword alone.
|
||||||
|
///
|
||||||
|
/// Returns `Err(())` for keywords that either do not represent callable
|
||||||
|
/// declarations or require additional syntax to determine the final kind
|
||||||
|
/// (for example `operator(<precedence>)`).
|
||||||
|
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
|
||||||
|
let kind = match keyword {
|
||||||
|
Keyword::Function => Self::Function,
|
||||||
|
Keyword::Event => Self::Event,
|
||||||
|
Keyword::Delegate => Self::Delegate,
|
||||||
|
Keyword::PreOperator => Self::PrefixOperator,
|
||||||
|
Keyword::PostOperator => Self::PostfixOperator,
|
||||||
|
_ => return Err(()),
|
||||||
|
};
|
||||||
|
Ok(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum CallableName {
|
||||||
|
Identifier(IdentifierToken),
|
||||||
|
PrefixOperator(PrefixOperatorName),
|
||||||
|
InfixOperator(InfixOperatorName),
|
||||||
|
PostfixOperator(PostfixOperatorName),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Callable definition.
|
||||||
|
///
|
||||||
|
/// This node represents the common syntactic shape shared by function-like
|
||||||
|
/// declarations, including ordinary functions, events, delegates, and
|
||||||
|
/// operator forms.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct CallableDefinition<'src, 'arena> {
|
||||||
|
/// Declared callable name.
|
||||||
|
pub name: CallableName,
|
||||||
|
/// Callable declaration form.
|
||||||
|
pub kind: CallableKind,
|
||||||
|
/// Optional return type.
|
||||||
|
///
|
||||||
|
/// Some callable forms may omit a return type entirely.
|
||||||
|
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
|
||||||
|
/// Declaration modifiers attached to the callable header.
|
||||||
|
pub modifiers: ArenaVec<'arena, CallableModifier>,
|
||||||
|
/// Formal parameters in source order.
|
||||||
|
pub parameters: ArenaVec<'arena, ParameterRef<'src, 'arena>>,
|
||||||
|
/// Optional callable body.
|
||||||
|
///
|
||||||
|
/// `None` represents a header-only declaration terminated by `;`.
|
||||||
|
/// `Some(...)` stores the parsed block statements belonging to the body.
|
||||||
|
pub body: Option<BlockBody<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a callable definition node.
|
||||||
|
pub type CallableDefinitionRef<'src, 'arena> = ArenaNode<'arena, CallableDefinition<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Callable declaration modifier kind.
|
||||||
|
///
|
||||||
|
/// These modifiers apply to the callable declaration itself rather than to an
|
||||||
|
/// individual parameter.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum CallableModifierKind {
|
||||||
|
Final,
|
||||||
|
/// `native` or `native(<index>)`
|
||||||
|
Native(Option<u128>),
|
||||||
|
Abstract,
|
||||||
|
Transient,
|
||||||
|
Public,
|
||||||
|
Protected,
|
||||||
|
Private,
|
||||||
|
Static,
|
||||||
|
/// `config(<name>)`
|
||||||
|
Config(IdentifierToken),
|
||||||
|
Const,
|
||||||
|
Deprecated,
|
||||||
|
NoExport,
|
||||||
|
Export,
|
||||||
|
Simulated,
|
||||||
|
Latent,
|
||||||
|
Iterator,
|
||||||
|
Singular,
|
||||||
|
Exec,
|
||||||
|
Reliable,
|
||||||
|
Unreliable,
|
||||||
|
NativeReplication,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Keyword> for CallableModifierKind {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
/// Converts a keyword into a [`CallableModifierKind`] when the modifier
|
||||||
|
/// is fully determined by the keyword alone.
|
||||||
|
///
|
||||||
|
/// Returns `Err(())` for keywords that either do not represent callable
|
||||||
|
/// modifiers or require additional syntax
|
||||||
|
/// (e.g. `native(...)`, `config(...)`).
|
||||||
|
#[allow(clippy::enum_glob_use)]
|
||||||
|
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
|
||||||
|
use CallableModifierKind::*;
|
||||||
|
|
||||||
|
let kind = match keyword {
|
||||||
|
Keyword::Final => Final,
|
||||||
|
Keyword::Abstract => Abstract,
|
||||||
|
Keyword::Transient => Transient,
|
||||||
|
Keyword::Public => Public,
|
||||||
|
Keyword::Protected => Protected,
|
||||||
|
Keyword::Private => Private,
|
||||||
|
Keyword::Static => Static,
|
||||||
|
Keyword::Const => Const,
|
||||||
|
Keyword::Deprecated => Deprecated,
|
||||||
|
Keyword::NoExport => NoExport,
|
||||||
|
Keyword::Export => Export,
|
||||||
|
Keyword::Simulated => Simulated,
|
||||||
|
Keyword::Latent => Latent,
|
||||||
|
Keyword::Iterator => Iterator,
|
||||||
|
Keyword::Singular => Singular,
|
||||||
|
Keyword::Exec => Exec,
|
||||||
|
Keyword::Reliable => Reliable,
|
||||||
|
Keyword::Unreliable => Unreliable,
|
||||||
|
Keyword::NativeReplication => NativeReplication,
|
||||||
|
_ => return Err(()),
|
||||||
|
};
|
||||||
|
Ok(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Callable modifier together with its full source span.
|
||||||
|
///
|
||||||
|
/// A modifier may occupy more than one token in source, for example when it
|
||||||
|
/// carries an argument like `native(12)` or `config(System)`.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct CallableModifier {
|
||||||
|
/// Modifier kind.
|
||||||
|
pub kind: CallableModifierKind,
|
||||||
|
/// Span covering the full modifier syntax.
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Keyword {
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_callable_modifier(self) -> bool {
|
||||||
|
matches!(self, Self::Native | Self::Config) || CallableModifierKind::try_from(self).is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_callable_kind_keyword(self) -> bool {
|
||||||
|
matches!(self, Self::Operator) || CallableKind::try_from(self).is_ok()
|
||||||
|
}
|
||||||
|
}
|
||||||
290
rottlib/src/ast/expressions.rs
Normal file
290
rottlib/src/ast/expressions.rs
Normal file
@ -0,0 +1,290 @@
|
|||||||
|
//! Expression AST nodes.
|
||||||
|
//!
|
||||||
|
//! This module defines ordinary expressions together with expression-shaped
|
||||||
|
//! control-flow and block forms parsed by the language.
|
||||||
|
use super::{
|
||||||
|
AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator,
|
||||||
|
QualifiedIdentifierRef, StatementRef,
|
||||||
|
};
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
|
||||||
|
use super::super::lexer::TokenPosition;
|
||||||
|
|
||||||
|
use crate::arena::{Arena, ArenaNode, ArenaString};
|
||||||
|
|
||||||
|
/// Expression node used for both ordinary expressions and expression-shaped
|
||||||
|
/// statement/control-flow forms.
|
||||||
|
///
|
||||||
|
/// This AST is intentionally broad: besides operators and literals, it also
|
||||||
|
/// includes blocks and control-flow constructs that syntactically occupy
|
||||||
|
/// expression parsing positions in the language.
|
||||||
|
#[allow(clippy::large_enum_variant)]
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum Expression<'src, 'arena> {
|
||||||
|
/// Plain identifier expression.
|
||||||
|
Identifier(IdentifierToken),
|
||||||
|
/// String literal.
|
||||||
|
///
|
||||||
|
/// The contents stored in arena memory are transformed (unescaped) version
|
||||||
|
/// of raw strings from the source.
|
||||||
|
String(ArenaString<'arena>),
|
||||||
|
/// Integer literal.
|
||||||
|
Integer(u128),
|
||||||
|
/// Floating-point literal.
|
||||||
|
Float(f64),
|
||||||
|
/// Boolean literal.
|
||||||
|
Bool(bool),
|
||||||
|
/// `None` literal / null-like language value.
|
||||||
|
None,
|
||||||
|
/// Explicit parenthesized subexpression: `(expr)`.
|
||||||
|
///
|
||||||
|
/// Parentheses are preserved as a node instead of being discarded so later
|
||||||
|
/// stages can retain grouping information for diagnostics, formatting, or
|
||||||
|
/// source-faithful reconstruction.
|
||||||
|
Parentheses(ExpressionRef<'src, 'arena>),
|
||||||
|
/// Class-type reference parsed as a qualified identifier path.
|
||||||
|
///
|
||||||
|
/// This is used for class-like type mentions that are not represented as a
|
||||||
|
/// tagged name literal.
|
||||||
|
ClassType(QualifiedIdentifierRef<'arena>),
|
||||||
|
/// Tagged or untagged quoted name literal.
|
||||||
|
///
|
||||||
|
/// Examples:
|
||||||
|
/// - `class'Foo'`
|
||||||
|
/// - `Texture'Pkg.Group.Name'`
|
||||||
|
/// - `'Pkg.Group.Name'` if the grammar permits an untagged form
|
||||||
|
///
|
||||||
|
/// `tag` stores the leading identifier token when present. `name` is the
|
||||||
|
/// raw content between quotes and is preserved exactly as written.
|
||||||
|
NameLiteral {
|
||||||
|
tag: Option<IdentifierToken>,
|
||||||
|
name: &'src str,
|
||||||
|
},
|
||||||
|
/// Indexing operation: `target[index]`.
|
||||||
|
///
|
||||||
|
/// This is produced after postfix parsing and binds tighter than any infix
|
||||||
|
/// operator.
|
||||||
|
Index {
|
||||||
|
target: ExpressionRef<'src, 'arena>,
|
||||||
|
index: ExpressionRef<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// Member access: `target.name`.
|
||||||
|
///
|
||||||
|
/// The member name is stored as a token reference rather than an owned
|
||||||
|
/// string so later stages can resolve exact spelling and source location
|
||||||
|
/// from the lexer/token stream.
|
||||||
|
Member {
|
||||||
|
target: ExpressionRef<'src, 'arena>,
|
||||||
|
name: IdentifierToken,
|
||||||
|
},
|
||||||
|
/// Call expression: `callee(arg1, arg2, ...)`.
|
||||||
|
///
|
||||||
|
/// Arguments are stored as `Option<ExpressionRef>` to preserve omitted
|
||||||
|
/// arguments in syntaxes that allow empty slots.
|
||||||
|
Call {
|
||||||
|
callee: ExpressionRef<'src, 'arena>,
|
||||||
|
arguments: ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>>,
|
||||||
|
},
|
||||||
|
/// Prefix unary operator application: `op rhs`.
|
||||||
|
PrefixUnary(PrefixOperator, ExpressionRef<'src, 'arena>),
|
||||||
|
/// Postfix unary operator application: `lhs op`.
|
||||||
|
PostfixUnary(ExpressionRef<'src, 'arena>, PostfixOperator),
|
||||||
|
/// Binary operator application: `lhs op rhs`.
|
||||||
|
Binary(
|
||||||
|
ExpressionRef<'src, 'arena>,
|
||||||
|
InfixOperator,
|
||||||
|
ExpressionRef<'src, 'arena>,
|
||||||
|
),
|
||||||
|
/// Block expression / statement block: `{ ... }`.
|
||||||
|
///
|
||||||
|
/// The contained statements are preserved in source order.
|
||||||
|
Block(StatementList<'src, 'arena>),
|
||||||
|
/// Conditional expression / statement.
|
||||||
|
///
|
||||||
|
/// Both arms use `BranchBody` so the parser can preserve legacy one-line
|
||||||
|
/// bodies, optional trailing semicolons, and recovery anchors.
|
||||||
|
If {
|
||||||
|
condition: ExpressionRef<'src, 'arena>,
|
||||||
|
body: BranchBody<'src, 'arena>,
|
||||||
|
else_body: Option<BranchBody<'src, 'arena>>,
|
||||||
|
},
|
||||||
|
/// `while (condition) body`
|
||||||
|
While {
|
||||||
|
condition: ExpressionRef<'src, 'arena>,
|
||||||
|
body: BranchBody<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// `do body until (condition)`
|
||||||
|
DoUntil {
|
||||||
|
condition: ExpressionRef<'src, 'arena>,
|
||||||
|
body: BranchBody<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// `foreach iterator body`
|
||||||
|
///
|
||||||
|
/// The iteration source / iterator expression is stored as a normal
|
||||||
|
/// expression node because the language permits nontrivial syntax there.
|
||||||
|
ForEach {
|
||||||
|
iterated_expression: ExpressionRef<'src, 'arena>,
|
||||||
|
body: BranchBody<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// Traditional three-part `for` loop.
|
||||||
|
///
|
||||||
|
/// Each header component is optional to support forms such as:
|
||||||
|
/// - `for (;;)`
|
||||||
|
/// - `for (init;;)`
|
||||||
|
/// - `for (;cond;)`
|
||||||
|
/// - `for (;;step)`
|
||||||
|
For {
|
||||||
|
initialization: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
condition: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
step: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
body: BranchBody<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// `switch` construct.
|
||||||
|
///
|
||||||
|
/// `cases` contains all explicit case arms in source order.
|
||||||
|
/// `default_arm` stores the statements of the default branch, if present.
|
||||||
|
Switch {
|
||||||
|
selector: ExpressionRef<'src, 'arena>,
|
||||||
|
cases: ArenaVec<'arena, SwitchCaseRef<'src, 'arena>>,
|
||||||
|
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
||||||
|
},
|
||||||
|
/// `goto` statement.
|
||||||
|
///
|
||||||
|
/// Stores the token position of the target token rather than duplicating
|
||||||
|
/// its textual representation in the AST. On successful parsing refers to
|
||||||
|
/// either identifier or name literal.
|
||||||
|
Goto(TokenPosition),
|
||||||
|
/// `continue` statement.
|
||||||
|
Continue,
|
||||||
|
/// `break` statement, optionally with an attached expression if the
|
||||||
|
/// language form allows one.
|
||||||
|
Break(Option<ExpressionRef<'src, 'arena>>),
|
||||||
|
/// `return` statement, optionally carrying a returned expression.
|
||||||
|
Return(Option<ExpressionRef<'src, 'arena>>),
|
||||||
|
/// Object construction / allocation form using the language's `new` syntax.
|
||||||
|
///
|
||||||
|
/// The first three arguments are optional positional control arguments.
|
||||||
|
/// `class_specifier` is the required class expression that identifies what
|
||||||
|
/// should be constructed.
|
||||||
|
New {
|
||||||
|
outer_argument: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
name_argument: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
flags_argument: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
class_specifier: ExpressionRef<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// Recovery placeholder inserted when an expression could not be parsed.
|
||||||
|
///
|
||||||
|
/// This allows the parser to continue building a larger AST and report more
|
||||||
|
/// than one error in a single pass.
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Statements contained in a `{ ... }` block.
|
||||||
|
pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Statements contained in a `{ ... }` block with a span.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct BlockBody<'src, 'arena> {
|
||||||
|
pub statements: StatementList<'src, 'arena>,
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to an expression node.
|
||||||
|
pub type ExpressionRef<'src, 'arena> = ArenaNode<'arena, Expression<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Optional expression payload used in grammar positions where an expression
|
||||||
|
/// may be omitted entirely.
|
||||||
|
pub type OptionalExpression<'src, 'arena> = Option<ExpressionRef<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Body of a control-flow branch.
|
||||||
|
///
|
||||||
|
/// Branch bodies are stored separately so constructs such as `if`, `while`,
|
||||||
|
/// and `for` can preserve both the parsed body and branch-specific source
|
||||||
|
/// details.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct BranchBody<'src, 'arena> {
|
||||||
|
/// Parsed branch payload.
|
||||||
|
///
|
||||||
|
/// This is `None` when the body is absent or could not be parsed in a
|
||||||
|
/// recoverable way.
|
||||||
|
pub expression: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
|
||||||
|
/// Optional semicolon that appears immediately after a non-block branch
|
||||||
|
/// body in legacy constructs such as `if`, `for`, `while`, etc.
|
||||||
|
///
|
||||||
|
/// This is intentionally preserved rather than normalized away so later
|
||||||
|
/// stages can diagnose or reproduce source structure more precisely.
|
||||||
|
pub semicolon_position: Option<TokenPosition>,
|
||||||
|
|
||||||
|
/// Token position that can be used as a fallback end anchor for spans and
|
||||||
|
/// diagnostics when the body itself is missing.
|
||||||
|
///
|
||||||
|
/// In malformed constructs this may be the only reliable location attached
|
||||||
|
/// to the branch.
|
||||||
|
pub end_anchor_token_position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// One `case` arm inside a `switch`.
|
||||||
|
///
|
||||||
|
/// UnrealScript-style syntax allows each arm to have multiple labels and uses
|
||||||
|
/// statement lists as bodies, with fallthrough being possible unless control
|
||||||
|
/// flow terminates explicitly.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct SwitchCase<'src, 'arena> {
|
||||||
|
/// Case labels associated with this arm.
|
||||||
|
///
|
||||||
|
/// Labels are stored as expressions because the language allows
|
||||||
|
/// expression-valued labels rather than only simple constants.
|
||||||
|
pub labels: ArenaVec<'arena, ExpressionRef<'src, 'arena>>,
|
||||||
|
|
||||||
|
/// Statements belonging to the arm body.
|
||||||
|
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a `switch` case arm.
|
||||||
|
pub type SwitchCaseRef<'src, 'arena> = ArenaNode<'arena, SwitchCase<'src, 'arena>>;
|
||||||
|
|
||||||
|
impl<'arena> Expression<'_, 'arena> {
|
||||||
|
/// Construct a binary expression and assign it a span from `left_hand_side`
|
||||||
|
/// through `right_hand_side`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new_binary(
|
||||||
|
arena: &'arena Arena,
|
||||||
|
left_hand_side: ArenaNode<'arena, Self>,
|
||||||
|
op: InfixOperator,
|
||||||
|
right_hand_side: ArenaNode<'arena, Self>,
|
||||||
|
) -> ArenaNode<'arena, Self> {
|
||||||
|
let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span());
|
||||||
|
ArenaNode::new_in(
|
||||||
|
Self::Binary(left_hand_side, op, right_hand_side),
|
||||||
|
span,
|
||||||
|
arena,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a prefix unary expression and assign it a span from the
|
||||||
|
/// operator token through the end of `right_hand_side`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new_prefix(
|
||||||
|
arena: &'arena Arena,
|
||||||
|
operation_position: TokenPosition,
|
||||||
|
operation: PrefixOperator,
|
||||||
|
right_hand_side: ArenaNode<'arena, Self>,
|
||||||
|
) -> ArenaNode<'arena, Self> {
|
||||||
|
let span = AstSpan::range(operation_position, right_hand_side.span().token_to);
|
||||||
|
ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Construct a postfix unary expression and assign it a span from the start
|
||||||
|
/// of `left_hand_side` through the operator token.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new_postfix(
|
||||||
|
arena: &'arena Arena,
|
||||||
|
left_hand_side: ArenaNode<'arena, Self>,
|
||||||
|
operation: PostfixOperator,
|
||||||
|
operation_position: TokenPosition,
|
||||||
|
) -> ArenaNode<'arena, Self> {
|
||||||
|
let span = AstSpan::range(left_hand_side.span().token_from, operation_position);
|
||||||
|
ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
343
rottlib/src/ast/mod.rs
Normal file
343
rottlib/src/ast/mod.rs
Normal file
@ -0,0 +1,343 @@
|
|||||||
|
// `;` are encoded in spans of statement nodes as very last token
|
||||||
|
// Need to do a proper check to figure out what should and shouldn't be a node
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
|
||||||
|
use super::lexer::TokenPosition;
|
||||||
|
|
||||||
|
use crate::arena::{Arena, ArenaNode, ArenaString};
|
||||||
|
|
||||||
|
pub mod callables;
|
||||||
|
pub mod expressions;
|
||||||
|
pub mod operators;
|
||||||
|
pub mod types;
|
||||||
|
|
||||||
|
pub use callables::*;
|
||||||
|
pub use expressions::*;
|
||||||
|
pub use operators::*;
|
||||||
|
pub use types::*;
|
||||||
|
|
||||||
|
// Get rid of identifier field
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub struct IdentifierToken(pub TokenPosition);
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub struct OperatorToken(pub TokenPosition);
|
||||||
|
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub struct QualifiedIdentifier<'arena> {
|
||||||
|
pub head: IdentifierToken,
|
||||||
|
pub tail: Option<ArenaVec<'arena, IdentifierToken>>, // None => single segment
|
||||||
|
}
|
||||||
|
pub type QualifiedIdentifierRef<'arena> = ArenaNode<'arena, QualifiedIdentifier<'arena>>;
|
||||||
|
|
||||||
|
// All inclusive!
|
||||||
|
#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
||||||
|
pub struct AstSpan {
|
||||||
|
pub token_from: TokenPosition,
|
||||||
|
pub token_to: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AstSpan {
|
||||||
|
// -------- existing coord-based API (unchanged externally) --------
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub const fn merge(left_span: &Self, right_span: &Self) -> Self {
|
||||||
|
Self {
|
||||||
|
// assumes both were constructed in the same style; good enough for the refactor
|
||||||
|
token_from: left_span.token_from,
|
||||||
|
token_to: right_span.token_to,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------- NEW: 4 constructors based on TokenIndex --------
|
||||||
|
|
||||||
|
/// Single-token span from an index (coords are dummy for now).
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const fn new(single_index: TokenPosition) -> Self {
|
||||||
|
Self {
|
||||||
|
token_from: single_index,
|
||||||
|
token_to: single_index,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Span from two indices (coords are dummy for now).
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const fn range(from: TokenPosition, to: TokenPosition) -> Self {
|
||||||
|
Self {
|
||||||
|
token_from: from,
|
||||||
|
token_to: to,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Immutable extension by index (keeps coords as-is).
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub fn extended(&self, right_most_index: TokenPosition) -> Self {
|
||||||
|
Self {
|
||||||
|
token_from: self.token_from,
|
||||||
|
token_to: std::cmp::max(self.token_to, right_most_index),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// In-place extension by index (coords unchanged).
|
||||||
|
#[inline]
|
||||||
|
pub fn extend_to(&mut self, right_most_index: TokenPosition) {
|
||||||
|
if right_most_index > self.token_to {
|
||||||
|
self.token_to = right_most_index;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'arena> QualifiedIdentifier<'arena> {
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_single(&self) -> bool {
|
||||||
|
self.tail.is_none()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[allow(clippy::len_without_is_empty)] // Suppress useless suggestion for `is_empty()`
|
||||||
|
#[must_use]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
1 + self.tail.as_ref().map_or(0, |v| v.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
pub const fn head(&self) -> IdentifierToken {
|
||||||
|
self.head
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterates all identifier segments in order without allocating.
|
||||||
|
pub fn iter(&self) -> impl Iterator<Item = IdentifierToken> + '_ {
|
||||||
|
core::iter::once(self.head).chain(self.tail.iter().flat_map(|v| v.iter().copied()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Cheap constructor from a single identifier. No Vec allocated.
|
||||||
|
pub fn from_ident(arena: &'arena Arena, id: IdentifierToken) -> QualifiedIdentifierRef<'arena> {
|
||||||
|
let span = AstSpan::new(id.0);
|
||||||
|
ArenaNode::new_in(
|
||||||
|
Self {
|
||||||
|
head: id,
|
||||||
|
tail: None,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
arena,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
/// Cheap constructor from a single identifier. No Vec allocated.
|
||||||
|
pub fn from_position(
|
||||||
|
arena: &'arena Arena,
|
||||||
|
position: TokenPosition,
|
||||||
|
) -> QualifiedIdentifierRef<'arena> {
|
||||||
|
let span = AstSpan::new(position);
|
||||||
|
ArenaNode::new_in(
|
||||||
|
Self {
|
||||||
|
head: IdentifierToken(position),
|
||||||
|
tail: None,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
arena,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum Statement<'src, 'arena> {
|
||||||
|
// For the cases where user just used too many semi-colons `;;;;`
|
||||||
|
Empty,
|
||||||
|
Expression(ExpressionRef<'src, 'arena>),
|
||||||
|
// Just declarations without assignment:
|
||||||
|
// `local int i, j, k`
|
||||||
|
LocalVariableDeclaration {
|
||||||
|
type_spec: TypeSpecifierRef<'src, 'arena>,
|
||||||
|
declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // CHANGED
|
||||||
|
},
|
||||||
|
Label(ArenaString<'arena>),
|
||||||
|
/// Nested function definitions inside blocks or states.
|
||||||
|
Function(CallableDefinitionRef<'src, 'arena>),
|
||||||
|
// For injecting in place of parts that couldn't be parsed
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
pub type StatementRef<'src, 'arena> = ArenaNode<'arena, Statement<'src, 'arena>>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum DeclarationLiteral<'src, 'arena> {
|
||||||
|
None,
|
||||||
|
Bool(bool),
|
||||||
|
Integer(i128),
|
||||||
|
Float(f64),
|
||||||
|
String(ArenaString<'arena>),
|
||||||
|
Identifier(&'src str),
|
||||||
|
TaggedName {
|
||||||
|
tag: IdentifierToken,
|
||||||
|
quoted: ArenaString<'arena>,
|
||||||
|
}, // NEW
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct DeclarationLiteralRef<'src, 'arena> {
|
||||||
|
pub literal: DeclarationLiteral<'src, 'arena>,
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IdentifierToken {
|
||||||
|
#[must_use]
|
||||||
|
pub const fn span(self) -> AstSpan {
|
||||||
|
AstSpan::new(self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum ClassModifier<'arena> {
|
||||||
|
Final,
|
||||||
|
Native,
|
||||||
|
Abstract,
|
||||||
|
Transient,
|
||||||
|
Public,
|
||||||
|
Protected,
|
||||||
|
Private,
|
||||||
|
Static,
|
||||||
|
Config(Option<IdentifierToken>),
|
||||||
|
NativeReplication,
|
||||||
|
ExportStructs,
|
||||||
|
SafeReplace,
|
||||||
|
|
||||||
|
Const,
|
||||||
|
Deprecated,
|
||||||
|
NoExport,
|
||||||
|
Export,
|
||||||
|
|
||||||
|
Localized,
|
||||||
|
Placeable,
|
||||||
|
NotPlaceable,
|
||||||
|
Instanced,
|
||||||
|
EditConst,
|
||||||
|
EditInline,
|
||||||
|
EditInlineNew,
|
||||||
|
NotEditInlineNew,
|
||||||
|
CollapseCategories,
|
||||||
|
DontCollapseCategories,
|
||||||
|
HideCategories(ArenaVec<'arena, IdentifierToken>),
|
||||||
|
ShowCategories(ArenaVec<'arena, IdentifierToken>),
|
||||||
|
Within(IdentifierToken),
|
||||||
|
DependsOn(IdentifierToken),
|
||||||
|
GlobalConfig,
|
||||||
|
PerObjectConfig,
|
||||||
|
DynamicRecompile,
|
||||||
|
HideDropdown,
|
||||||
|
ParseConfig,
|
||||||
|
CacheExempt,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type ClassModifierRef<'arena> = ArenaNode<'arena, ClassModifier<'arena>>;
|
||||||
|
|
||||||
|
pub struct ClassDeclaration<'arena> {
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
pub parent: Option<IdentifierToken>,
|
||||||
|
pub modifiers: Vec<ClassModifierRef<'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- in ast.rs ---
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ClassVarDecl<'src, 'arena> {
|
||||||
|
/// var(<...>) e.g. var(Display, "Advanced")
|
||||||
|
/// Each item is an `ArenaNode`, so token locations are preserved.
|
||||||
|
pub paren_specs: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
|
||||||
|
|
||||||
|
/// variable modifiers like public/protected/private/static/const/...
|
||||||
|
/// Each modifier is an `ArenaNode` capturing its span; order preserved.
|
||||||
|
pub modifiers: ArenaVec<'arena, VarModifier>,
|
||||||
|
|
||||||
|
pub type_spec: TypeSpecifierRef<'src, 'arena>, // Named/InlineEnum/InlineStruct
|
||||||
|
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // a, b=expr
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'arena>>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ClassConstDecl<'src, 'arena> {
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
pub value: DeclarationLiteralRef<'src, 'arena>,
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type ClassConstDeclRef<'src, 'arena> = ArenaNode<'arena, ClassConstDecl<'src, 'arena>>;
|
||||||
|
|
||||||
|
pub enum ClassMember<'src, 'arena>
|
||||||
|
where
|
||||||
|
'src: 'arena,
|
||||||
|
{
|
||||||
|
Function(CallableDefinitionRef<'src, 'arena>),
|
||||||
|
TypeDefEnum(EnumDefRef<'src, 'arena>),
|
||||||
|
TypeDefStruct(StructDefRef<'src, 'arena>),
|
||||||
|
Var(ClassVarDeclRef<'src, 'arena>),
|
||||||
|
|
||||||
|
Replication(ReplicationBlockRef<'src, 'arena>),
|
||||||
|
State(StateDeclRef<'src, 'arena>),
|
||||||
|
Const(ClassConstDeclRef<'src, 'arena>),
|
||||||
|
Exec(ExecDirectiveRef<'arena>),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type ClassMemberRef<'src, 'arena> = ArenaNode<'arena, ClassMember<'src, 'arena>>;
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub enum Reliability {
|
||||||
|
Reliable,
|
||||||
|
Unreliable,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ReplicationRule<'src, 'arena> {
|
||||||
|
pub reliability: Reliability, // reliable|unreliable
|
||||||
|
pub condition: Option<ExpressionRef<'src, 'arena>>, // if (<expr>) or None
|
||||||
|
pub members: ArenaVec<'arena, IdentifierToken>, // a, b, Foo()
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type ReplicationRuleRef<'src, 'arena> = ArenaNode<'arena, ReplicationRule<'src, 'arena>>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ReplicationBlock<'src, 'arena> {
|
||||||
|
pub rules: ArenaVec<'arena, ReplicationRuleRef<'src, 'arena>>,
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type ReplicationBlockRef<'src, 'arena> = ArenaNode<'arena, ReplicationBlock<'src, 'arena>>;
|
||||||
|
|
||||||
|
// ---------- States ----------
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
pub enum StateModifier {
|
||||||
|
Auto, // 'auto'
|
||||||
|
Simulated, // 'simulated'
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct StateDecl<'src, 'arena> {
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
pub parent: Option<IdentifierToken>, // 'extends BaseState'
|
||||||
|
pub modifiers: ArenaVec<'arena, StateModifier>, // auto, simulated
|
||||||
|
pub ignores: Option<ArenaVec<'arena, IdentifierToken>>, // 'ignores Foo, Bar;'
|
||||||
|
/// Body: ordinary statements plus nested function definitions (see `Statement::Function`).
|
||||||
|
pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>>;
|
||||||
|
|
||||||
|
// NEW: exec directive node
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ExecDirective<'arena> {
|
||||||
|
pub text: ArenaString<'arena>, // full line without trailing newline(s)
|
||||||
|
pub span: AstSpan,
|
||||||
|
}
|
||||||
|
pub type ExecDirectiveRef<'arena> = ArenaNode<'arena, ExecDirective<'arena>>;
|
||||||
|
|
||||||
|
/// Keep your existing `ClassDeclaration` as the header.
|
||||||
|
/// Optionally: `pub type ClassHeader<'src, 'arena> = ClassDeclaration<'src, 'arena>;`
|
||||||
|
pub struct ClassDefinition<'src, 'arena>
|
||||||
|
where
|
||||||
|
'src: 'arena,
|
||||||
|
{
|
||||||
|
pub header: ClassDeclaration<'arena>, // or ClassHeader if you rename
|
||||||
|
pub members: ArenaVec<'arena, ClassMemberRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
268
rottlib/src/ast/operators.rs
Normal file
268
rottlib/src/ast/operators.rs
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
//! Operator AST nodes.
|
||||||
|
//!
|
||||||
|
//! This module defines the prefix, postfix, and infix operator kinds used by
|
||||||
|
//! expression AST nodes.
|
||||||
|
//!
|
||||||
|
//! The enums here represent only the *syntactic operator category* recorded in
|
||||||
|
//! the AST. They do not encode precedence, associativity, overload behavior,
|
||||||
|
//! or token spelling details beyond the normalized operator kind itself.
|
||||||
|
//! Those concerns are handled by the expression parser and precedence tables.
|
||||||
|
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
|
||||||
|
use core::convert::TryFrom;
|
||||||
|
|
||||||
|
/// Prefix unary operators.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum PrefixOperator {
|
||||||
|
/// Logical negation: `!expr`.
|
||||||
|
Not,
|
||||||
|
/// Arithmetic negation: `-expr`.
|
||||||
|
Minus,
|
||||||
|
/// Unary plus: `+expr`.
|
||||||
|
Plus,
|
||||||
|
/// Bitwise negation: `~expr`.
|
||||||
|
BitwiseNot,
|
||||||
|
/// Prefix increment: `++expr`.
|
||||||
|
Increment,
|
||||||
|
/// Prefix decrement: `--expr`.
|
||||||
|
Decrement,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Postfix unary operators.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum PostfixOperator {
|
||||||
|
/// Postfix increment: `expr++`.
|
||||||
|
Increment,
|
||||||
|
/// Postfix decrement: `expr--`.
|
||||||
|
Decrement,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Binary / infix operators.
|
||||||
|
///
|
||||||
|
/// These operators appear between left-hand side and right-hand side operands.
|
||||||
|
/// This enum stores only the normalized AST-level operator kind.
|
||||||
|
///
|
||||||
|
/// The parser assigns precedence and associativity separately.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum InfixOperator {
|
||||||
|
/// Simple assignment: `left_hand_side = right_hand_side`.
|
||||||
|
Assign,
|
||||||
|
/// Multiplicative assignment: `left_hand_side *= right_hand_side`.
|
||||||
|
MultiplyAssign,
|
||||||
|
/// Division assignment: `left_hand_side /= right_hand_side`.
|
||||||
|
DivideAssign,
|
||||||
|
/// Modulo assignment: `left_hand_side %= right_hand_side`.
|
||||||
|
ModuloAssign,
|
||||||
|
/// Additive assignment: `left_hand_side += right_hand_side`.
|
||||||
|
PlusAssign,
|
||||||
|
/// Subtractive assignment: `left_hand_side -= right_hand_side`.
|
||||||
|
MinusAssign,
|
||||||
|
/// String concatenation assignment: `left_hand_side $= right_hand_side`.
|
||||||
|
ConcatAssign,
|
||||||
|
/// Space-concatenation assignment: `left_hand_side @= right_hand_side`.
|
||||||
|
ConcatSpaceAssign,
|
||||||
|
|
||||||
|
/// String concatenation without inserted whitespace:
|
||||||
|
/// `left_hand_side $ right_hand_side`.
|
||||||
|
Concat,
|
||||||
|
/// String concatenation with an inserted space:
|
||||||
|
/// `left_hand_side @ right_hand_side`.
|
||||||
|
ConcatSpace,
|
||||||
|
|
||||||
|
/// Logical conjunction: `left_hand_side && right_hand_side`.
|
||||||
|
And,
|
||||||
|
/// Logical exclusive-or: `left_hand_side ^^ right_hand_side`.
|
||||||
|
Xor,
|
||||||
|
/// Logical disjunction: `left_hand_side || right_hand_side`.
|
||||||
|
Or,
|
||||||
|
|
||||||
|
/// Bitwise AND: `left_hand_side & right_hand_side`.
|
||||||
|
BitwiseAnd,
|
||||||
|
/// Bitwise OR: `left_hand_side | right_hand_side`.
|
||||||
|
BitwiseOr,
|
||||||
|
/// Bitwise XOR: `left_hand_side ^ right_hand_side`.
|
||||||
|
BitwiseXor,
|
||||||
|
|
||||||
|
/// Inequality test: `left_hand_side != right_hand_side`.
|
||||||
|
NotEqual,
|
||||||
|
/// Equality test: `left_hand_side == right_hand_side`.
|
||||||
|
Equal,
|
||||||
|
/// Approximate equality test: `left_hand_side ~= right_hand_side`.
|
||||||
|
ApproximatelyEqual,
|
||||||
|
/// Less-than comparison: `left_hand_side < right_hand_side`.
|
||||||
|
Less,
|
||||||
|
/// Less-than-or-equal comparison: `left_hand_side <= right_hand_side`.
|
||||||
|
LessEqual,
|
||||||
|
/// Greater-than comparison: `left_hand_side > right_hand_side`.
|
||||||
|
Greater,
|
||||||
|
/// Greater-than-or-equal comparison: `left_hand_side >= right_hand_side`.
|
||||||
|
GreaterEqual,
|
||||||
|
/// UnrealScript-specific directional comparison:
|
||||||
|
/// `left_hand_side ClockwiseFrom right_hand_side`.
|
||||||
|
ClockwiseFrom,
|
||||||
|
|
||||||
|
/// Left shift: `left_hand_side << right_hand_side`.
|
||||||
|
LeftShift,
|
||||||
|
/// Logical right shift: `left_hand_side >>> right_hand_side`.
|
||||||
|
LogicalRightShift,
|
||||||
|
/// Arithmetic / ordinary right shift: `left_hand_side >> right_hand_side`.
|
||||||
|
RightShift,
|
||||||
|
|
||||||
|
/// Addition: `left_hand_side + right_hand_side`.
|
||||||
|
Plus,
|
||||||
|
/// Subtraction: `left_hand_side - right_hand_side`.
|
||||||
|
Minus,
|
||||||
|
|
||||||
|
/// Remainder / modulo: `left_hand_side % right_hand_side`.
|
||||||
|
Modulo,
|
||||||
|
/// Multiplication: `left_hand_side * right_hand_side`.
|
||||||
|
Multiply,
|
||||||
|
/// Division: `left_hand_side / right_hand_side`.
|
||||||
|
Divide,
|
||||||
|
|
||||||
|
/// Dot product: `left_hand_side Dot right_hand_side`.
|
||||||
|
///
|
||||||
|
/// This is spelled as a keyword-level operator in source.
|
||||||
|
Dot,
|
||||||
|
/// Cross product: `left_hand_side Cross right_hand_side`.
|
||||||
|
///
|
||||||
|
/// This is spelled as a keyword-level operator in source.
|
||||||
|
Cross,
|
||||||
|
|
||||||
|
/// Exponentiation: `left_hand_side ** right_hand_side`.
|
||||||
|
Exponentiation,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct PrefixOperatorName {
|
||||||
|
pub kind: PrefixOperator,
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct InfixOperatorName {
|
||||||
|
pub kind: InfixOperator,
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct PostfixOperatorName {
|
||||||
|
pub kind: PostfixOperator,
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Token> for PostfixOperator {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||||
|
use PostfixOperator::{Decrement, Increment};
|
||||||
|
|
||||||
|
match token {
|
||||||
|
Token::Increment => Ok(Increment),
|
||||||
|
Token::Decrement => Ok(Decrement),
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Token> for PrefixOperator {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||||
|
use PrefixOperator::{BitwiseNot, Decrement, Increment, Minus, Not, Plus};
|
||||||
|
|
||||||
|
match token {
|
||||||
|
Token::Not => Ok(Not),
|
||||||
|
Token::Minus => Ok(Minus),
|
||||||
|
Token::Plus => Ok(Plus),
|
||||||
|
Token::BitwiseNot => Ok(BitwiseNot),
|
||||||
|
Token::Increment => Ok(Increment),
|
||||||
|
Token::Decrement => Ok(Decrement),
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||||
|
pub(crate) struct InfixOperatorInfo {
|
||||||
|
pub operator: InfixOperator,
|
||||||
|
pub right_precedence_rank: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) const fn infix_operator_info(token: Token) -> Option<InfixOperatorInfo> {
|
||||||
|
use InfixOperator::{
|
||||||
|
And, ApproximatelyEqual, Assign, BitwiseAnd, BitwiseOr, BitwiseXor, ClockwiseFrom, Concat,
|
||||||
|
ConcatAssign, ConcatSpace, ConcatSpaceAssign, Cross, Divide, DivideAssign, Dot, Equal,
|
||||||
|
Exponentiation, Greater, GreaterEqual, LeftShift, Less, LessEqual, LogicalRightShift,
|
||||||
|
Minus, MinusAssign, Modulo, ModuloAssign, Multiply, MultiplyAssign, NotEqual, Or, Plus,
|
||||||
|
PlusAssign, RightShift, Xor,
|
||||||
|
};
|
||||||
|
|
||||||
|
let (precedence_rank, operator) = match token {
|
||||||
|
Token::Exponentiation => (12, Exponentiation),
|
||||||
|
|
||||||
|
Token::Multiply => (16, Multiply),
|
||||||
|
Token::Divide => (16, Divide),
|
||||||
|
Token::Keyword(Keyword::Cross) => (16, Cross),
|
||||||
|
Token::Keyword(Keyword::Dot) => (16, Dot),
|
||||||
|
|
||||||
|
Token::Modulo => (18, Modulo),
|
||||||
|
|
||||||
|
Token::Plus => (20, Plus),
|
||||||
|
Token::Minus => (20, Minus),
|
||||||
|
|
||||||
|
Token::LeftShift => (22, LeftShift),
|
||||||
|
Token::RightShift => (22, RightShift),
|
||||||
|
Token::LogicalRightShift => (22, LogicalRightShift),
|
||||||
|
|
||||||
|
Token::Less => (24, Less),
|
||||||
|
Token::LessEqual => (24, LessEqual),
|
||||||
|
Token::Greater => (24, Greater),
|
||||||
|
Token::GreaterEqual => (24, GreaterEqual),
|
||||||
|
Token::Equal => (24, Equal),
|
||||||
|
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
|
||||||
|
Token::Keyword(Keyword::ClockwiseFrom) => (24, ClockwiseFrom),
|
||||||
|
|
||||||
|
Token::NotEqual => (26, NotEqual),
|
||||||
|
|
||||||
|
Token::BitwiseAnd => (28, BitwiseAnd),
|
||||||
|
Token::BitwiseXor => (28, BitwiseXor),
|
||||||
|
Token::BitwiseOr => (28, BitwiseOr),
|
||||||
|
|
||||||
|
Token::LogicalAnd => (30, And),
|
||||||
|
Token::LogicalXor => (30, Xor),
|
||||||
|
|
||||||
|
Token::LogicalOr => (32, Or),
|
||||||
|
|
||||||
|
Token::MultiplyAssign => (34, MultiplyAssign),
|
||||||
|
Token::DivideAssign => (34, DivideAssign),
|
||||||
|
Token::PlusAssign => (34, PlusAssign),
|
||||||
|
Token::MinusAssign => (34, MinusAssign),
|
||||||
|
Token::Assign => (34, Assign),
|
||||||
|
Token::ModuloAssign => (34, ModuloAssign),
|
||||||
|
|
||||||
|
Token::Concat => (40, Concat),
|
||||||
|
Token::ConcatSpace => (40, ConcatSpace),
|
||||||
|
|
||||||
|
Token::ConcatAssign => (44, ConcatAssign),
|
||||||
|
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
|
||||||
|
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(InfixOperatorInfo {
|
||||||
|
operator,
|
||||||
|
right_precedence_rank: precedence_rank,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Token> for InfixOperator {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||||
|
infix_operator_info(token)
|
||||||
|
.map(|info| info.operator)
|
||||||
|
.ok_or(())
|
||||||
|
}
|
||||||
|
}
|
||||||
277
rottlib/src/ast/types.rs
Normal file
277
rottlib/src/ast/types.rs
Normal file
@ -0,0 +1,277 @@
|
|||||||
|
//! Type-specifier and declaration AST nodes.
|
||||||
|
//!
|
||||||
|
//! This module defines syntactic forms used to represent type names, inline
|
||||||
|
//! type declarations, variable declarators, and declaration modifiers.
|
||||||
|
use super::{AstSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef};
|
||||||
|
|
||||||
|
use crate::arena::{ArenaNode, ArenaString, ArenaVec};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
|
||||||
|
use core::convert::TryFrom;
|
||||||
|
|
||||||
|
/// Type syntax used in declarations, fields, and other type-annotated grammar
|
||||||
|
/// positions.
|
||||||
|
///
|
||||||
|
/// This enum covers both named types and inline type-definition forms supported
|
||||||
|
/// by the language.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum TypeSpecifier<'src, 'arena> {
|
||||||
|
/// Named type reference such as `EDrawType` or `Pkg.Group.Type`.
|
||||||
|
Named(QualifiedIdentifierRef<'arena>),
|
||||||
|
/// Inline enum definition used directly in type position.
|
||||||
|
///
|
||||||
|
/// Example:
|
||||||
|
/// `enum EMyKind { A, B, C }`
|
||||||
|
InlineEnum(EnumDefRef<'src, 'arena>),
|
||||||
|
/// Inline struct definition used directly in type position.
|
||||||
|
///
|
||||||
|
/// Example:
|
||||||
|
/// `struct SMyData { var int X; }`
|
||||||
|
InlineStruct(StructDefRef<'src, 'arena>),
|
||||||
|
/// Generic array type: `array<...>`.
|
||||||
|
///
|
||||||
|
/// The parser currently allows a sequence of variable-style modifiers to
|
||||||
|
/// appear before the inner type and preserves them here.
|
||||||
|
Array {
|
||||||
|
/// Modifiers parsed before the inner type inside `array<...>`.
|
||||||
|
element_modifiers: ArenaVec<'arena, VarModifier>,
|
||||||
|
/// Element / inner type.
|
||||||
|
element_type: TypeSpecifierRef<'src, 'arena>,
|
||||||
|
},
|
||||||
|
/// `class` or `class<SomeType>`.
|
||||||
|
///
|
||||||
|
/// `None` represents a bare `class` with no type argument.
|
||||||
|
Class(Option<QualifiedIdentifierRef<'arena>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a type-specifier node.
|
||||||
|
pub type TypeSpecifierRef<'src, 'arena> = ArenaNode<'arena, TypeSpecifier<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Enum definition used either inline in a type position or elsewhere in the
|
||||||
|
/// declaration grammar.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub struct EnumDefinition<'arena> {
|
||||||
|
/// Declared enum name.
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
/// Enum variants in source order.
|
||||||
|
pub variants: ArenaVec<'arena, IdentifierToken>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to an enum definition.
|
||||||
|
pub type EnumDefRef<'src, 'arena> = ArenaNode<'arena, EnumDefinition<'arena>>;
|
||||||
|
|
||||||
|
/// Struct-level modifier kind.
|
||||||
|
///
|
||||||
|
/// These are modifiers that apply to the struct declaration itself rather than
|
||||||
|
/// to an individual field.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum StructModifierKind {
|
||||||
|
Native,
|
||||||
|
Export,
|
||||||
|
NoExport,
|
||||||
|
Transient,
|
||||||
|
Deprecated,
|
||||||
|
Init,
|
||||||
|
Long,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Struct declaration modifier together with its source token position.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct StructModifier {
|
||||||
|
/// Modifier kind.
|
||||||
|
pub kind: StructModifierKind,
|
||||||
|
/// Position of the modifier token in the source stream.
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StructModifier {
|
||||||
|
/// Span covering just this modifier token.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn span(self) -> AstSpan {
|
||||||
|
AstSpan::new(self.position)
|
||||||
|
}
|
||||||
|
/// Construct a struct modifier from kind and token position.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn new(kind: StructModifierKind, token: TokenPosition) -> Self {
|
||||||
|
Self {
|
||||||
|
kind,
|
||||||
|
position: token,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Struct field declaration.
|
||||||
|
///
|
||||||
|
/// A field stores the declared type together with one or more declarators
|
||||||
|
/// sharing that type, plus optional `var(...)` editor specifiers and ordinary
|
||||||
|
/// declaration modifiers.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct StructField<'src, 'arena> {
|
||||||
|
/// Field type.
|
||||||
|
pub type_specifier: TypeSpecifierRef<'src, 'arena>,
|
||||||
|
/// One or more declarators declared with the same type.
|
||||||
|
///
|
||||||
|
/// Examples:
|
||||||
|
/// - `var int A;`
|
||||||
|
/// - `var int A, B[4], C = 10;`
|
||||||
|
pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
|
||||||
|
/// Optional `var(...)` editor specifiers attached to the field declaration.
|
||||||
|
///
|
||||||
|
/// Example:
|
||||||
|
/// `var(Display, "Advanced/Hidden")`
|
||||||
|
pub editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
|
||||||
|
/// Declaration modifiers attached to the field.
|
||||||
|
///
|
||||||
|
/// These are preserved in source order.
|
||||||
|
pub declaration_modifiers: ArenaVec<'arena, VarModifier>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a struct field declaration.
|
||||||
|
pub type StructFieldRef<'src, 'arena> = ArenaNode<'arena, StructField<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// Struct definition used either inline in a type position or elsewhere in the
|
||||||
|
/// declaration grammar.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct StructDefinition<'src, 'arena> {
|
||||||
|
/// Struct name, if present.
|
||||||
|
///
|
||||||
|
/// Anonymous inline structs use `None`.
|
||||||
|
pub name: Option<IdentifierToken>,
|
||||||
|
/// Optional base struct after `extends`.
|
||||||
|
pub base_type_name: Option<QualifiedIdentifierRef<'arena>>,
|
||||||
|
/// Modifiers attached to the struct declaration itself.
|
||||||
|
pub modifiers: ArenaVec<'arena, StructModifier>,
|
||||||
|
/// Struct fields in source order.
|
||||||
|
pub fields: ArenaVec<'arena, StructFieldRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a struct definition.
|
||||||
|
pub type StructDefRef<'src, 'arena> = ArenaNode<'arena, StructDefinition<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// One declared variable name together with optional array size and initializer.
|
||||||
|
///
|
||||||
|
/// This node represents one declarator inside a declaration that may contain
|
||||||
|
/// several comma-separated declarators sharing the same type.
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub struct VariableDeclarator<'src, 'arena> {
|
||||||
|
/// Declared variable name.
|
||||||
|
pub name: IdentifierToken,
|
||||||
|
/// Optional initializer after `=`.
|
||||||
|
pub initializer: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
/// Optional array-size expression from `[expr]`.
|
||||||
|
pub array_size: Option<ExpressionRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to a variable declarator.
|
||||||
|
///
|
||||||
|
/// The node span is expected to cover the entire declarator, not only the
|
||||||
|
/// identifier token.
|
||||||
|
pub type VariableDeclaratorRef<'src, 'arena> = ArenaNode<'arena, VariableDeclarator<'src, 'arena>>;
|
||||||
|
|
||||||
|
/// One item inside `var(...)` editor specifiers.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum VarEditorSpecifier<'arena> {
|
||||||
|
/// Identifier-like editor specifier such as `Display` or `Advanced`.
|
||||||
|
Identifier(IdentifierToken),
|
||||||
|
/// String editor specifier such as `"Category/Sub"`.
|
||||||
|
String(ArenaString<'arena>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stable arena reference to an editor specifier.
|
||||||
|
pub type VarEditorSpecifierRef<'src, 'arena> = ArenaNode<'arena, VarEditorSpecifier<'arena>>;
|
||||||
|
|
||||||
|
/// Field / variable declaration modifier kind.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VarModifierKind {
|
||||||
|
Transient,
|
||||||
|
Public,
|
||||||
|
Protected,
|
||||||
|
Private,
|
||||||
|
Static,
|
||||||
|
Const,
|
||||||
|
Deprecated,
|
||||||
|
NoExport,
|
||||||
|
Export,
|
||||||
|
Config,
|
||||||
|
Localized,
|
||||||
|
GlobalConfig,
|
||||||
|
PerObjectConfig,
|
||||||
|
Input,
|
||||||
|
EdFindable,
|
||||||
|
EditConst,
|
||||||
|
EditConstArray,
|
||||||
|
EditInline,
|
||||||
|
EditInlineUse,
|
||||||
|
EditInlineNew,
|
||||||
|
EditInlineNotify,
|
||||||
|
NotEditInlineNew,
|
||||||
|
Automated,
|
||||||
|
Native,
|
||||||
|
Travel,
|
||||||
|
Cache,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Variable-style declaration modifier together with its token position.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct VarModifier {
|
||||||
|
/// Modifier kind.
|
||||||
|
pub kind: VarModifierKind,
|
||||||
|
/// Position of the modifier token in the source stream.
|
||||||
|
pub position: TokenPosition,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Keyword> for VarModifierKind {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from(keyword: Keyword) -> Result<Self, Self::Error> {
|
||||||
|
use VarModifierKind::{
|
||||||
|
Automated, Cache, Config, Const, Deprecated, EdFindable, EditConst, EditConstArray,
|
||||||
|
EditInline, EditInlineNew, EditInlineNotify, EditInlineUse, Export, GlobalConfig,
|
||||||
|
Input, Localized, Native, NoExport, NotEditInlineNew, PerObjectConfig, Private,
|
||||||
|
Protected, Public, Static, Transient, Travel,
|
||||||
|
};
|
||||||
|
|
||||||
|
let kind = match keyword {
|
||||||
|
Keyword::Transient => Transient,
|
||||||
|
Keyword::Public => Public,
|
||||||
|
Keyword::Protected => Protected,
|
||||||
|
Keyword::Private => Private,
|
||||||
|
Keyword::Static => Static,
|
||||||
|
Keyword::Const => Const,
|
||||||
|
Keyword::Deprecated => Deprecated,
|
||||||
|
Keyword::NoExport => NoExport,
|
||||||
|
Keyword::Export => Export,
|
||||||
|
Keyword::Config => Config,
|
||||||
|
Keyword::Localized => Localized,
|
||||||
|
Keyword::GlobalConfig => GlobalConfig,
|
||||||
|
Keyword::PerObjectConfig => PerObjectConfig,
|
||||||
|
Keyword::EdFindable => EdFindable,
|
||||||
|
Keyword::EditConst => EditConst,
|
||||||
|
Keyword::EditConstArray => EditConstArray,
|
||||||
|
Keyword::EditInline => EditInline,
|
||||||
|
Keyword::EditInlineUse => EditInlineUse,
|
||||||
|
Keyword::EditInlineNew => EditInlineNew,
|
||||||
|
Keyword::EditInlineNotify => EditInlineNotify,
|
||||||
|
Keyword::NotEditInlineNew => NotEditInlineNew,
|
||||||
|
Keyword::Automated => Automated,
|
||||||
|
Keyword::Native => Native,
|
||||||
|
Keyword::Input => Input,
|
||||||
|
Keyword::Travel => Travel,
|
||||||
|
Keyword::Cache => Cache,
|
||||||
|
_ => return Err(()),
|
||||||
|
};
|
||||||
|
Ok(kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<(Token, TokenPosition)> for VarModifier {
|
||||||
|
type Error = ();
|
||||||
|
|
||||||
|
fn try_from((token, position): (Token, TokenPosition)) -> Result<Self, Self::Error> {
|
||||||
|
let Token::Keyword(keyword) = token else {
|
||||||
|
return Err(());
|
||||||
|
};
|
||||||
|
let kind = VarModifierKind::try_from(keyword)?;
|
||||||
|
Ok(Self { kind, position })
|
||||||
|
}
|
||||||
|
}
|
||||||
190
rottlib/src/diagnostics/expression.rs
Normal file
190
rottlib/src/diagnostics/expression.rs
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
use super::{Diagnostic, DiagnosticBuilder};
|
||||||
|
use crate::ast::AstSpan;
|
||||||
|
use crate::lexer::TokenPosition;
|
||||||
|
use crate::parser::{ParseError, ParseErrorKind};
|
||||||
|
use std::convert::From;
|
||||||
|
|
||||||
|
fn diagnostic_parenthesized_expression_empty(
|
||||||
|
error: ParseError,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("empty parenthesized expression")
|
||||||
|
.primary_label(error.blame_span, "expected an expression before this `)`")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_parenthesis_position),
|
||||||
|
"parenthesized expression starts here",
|
||||||
|
)
|
||||||
|
.help("Remove the parentheses or put an expression inside them.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_class_type_missing_type_argument(
|
||||||
|
error: ParseError,
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("missing type argument in `class<...>`")
|
||||||
|
.primary_label(error.blame_span, "expected a type name here")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_angle_bracket_position),
|
||||||
|
"type argument list starts here",
|
||||||
|
)
|
||||||
|
.help("Write a type name, for example `class<Pawn>`.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_class_type_missing_closing_angle_bracket(
|
||||||
|
error: ParseError,
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("missing closing `>` in `class<...>`")
|
||||||
|
.primary_label(error.blame_span, "expected `>` here")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_angle_bracket_position),
|
||||||
|
"this `<` starts the type argument",
|
||||||
|
)
|
||||||
|
.help("Add `>` to close the class type expression.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_parenthesized_expression_missing_closing_parenthesis(
|
||||||
|
error: ParseError,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("missing closing `)`")
|
||||||
|
.primary_label(error.blame_span, "expected `)` here")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_parenthesis_position),
|
||||||
|
"this `(` starts the parenthesized expression",
|
||||||
|
)
|
||||||
|
.help("Add `)` to close the expression.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_expression_expected(error: ParseError) -> Diagnostic {
|
||||||
|
let mut builder = DiagnosticBuilder::error("expected expression")
|
||||||
|
.primary_label(error.blame_span, "this token cannot start an expression")
|
||||||
|
.help(
|
||||||
|
"Expressions can start with literals, identifiers, `(`, `{`, or expression keywords.",
|
||||||
|
);
|
||||||
|
|
||||||
|
if let Some(related_span) = error.related_span {
|
||||||
|
builder = builder.secondary_label(related_span, "expression context starts here");
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_class_type_invalid_type_argument(
|
||||||
|
error: ParseError,
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("invalid type argument in `class<...>`")
|
||||||
|
.primary_label(error.blame_span, "expected a qualified type name here")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_angle_bracket_position),
|
||||||
|
"type argument list starts here",
|
||||||
|
)
|
||||||
|
.note("Only a qualified type name is accepted between `<` and `>` here.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_new_too_many_arguments(
|
||||||
|
error: ParseError,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("too many arguments in `new(...)`")
|
||||||
|
.primary_label(error.blame_span, "unexpected extra argument")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_parenthesis_position),
|
||||||
|
"this argument list accepts at most three arguments",
|
||||||
|
)
|
||||||
|
.note("The three slots are `outer`, `name`, and `flags`.")
|
||||||
|
.help("Remove the extra argument.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_new_missing_closing_parenthesis(
|
||||||
|
error: ParseError,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
DiagnosticBuilder::error("missing closing `)` in `new(...)`")
|
||||||
|
.primary_label(error.blame_span, "expected `)` here")
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(left_parenthesis_position),
|
||||||
|
"this argument list starts here",
|
||||||
|
)
|
||||||
|
.help("Add `)` to close the argument list.")
|
||||||
|
.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn diagnostic_new_missing_class_specifier(
|
||||||
|
error: ParseError,
|
||||||
|
new_keyword_position: TokenPosition,
|
||||||
|
) -> Diagnostic {
|
||||||
|
let mut builder = DiagnosticBuilder::error("missing class specifier in `new` expression")
|
||||||
|
.primary_label(
|
||||||
|
error.blame_span,
|
||||||
|
"expected the class or expression to instantiate here",
|
||||||
|
)
|
||||||
|
.secondary_label(
|
||||||
|
AstSpan::new(new_keyword_position),
|
||||||
|
"`new` expression starts here",
|
||||||
|
)
|
||||||
|
.help("Add the class or expression to instantiate after `new` or `new(...)`.");
|
||||||
|
|
||||||
|
if let Some(related_span) = error.related_span {
|
||||||
|
builder = builder.secondary_label(related_span, "optional `new(...)` arguments end here");
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ParseError> for Diagnostic {
|
||||||
|
fn from(error: ParseError) -> Self {
|
||||||
|
match error.kind {
|
||||||
|
ParseErrorKind::ParenthesizedExpressionEmpty {
|
||||||
|
left_parenthesis_position,
|
||||||
|
} => diagnostic_parenthesized_expression_empty(error, left_parenthesis_position),
|
||||||
|
|
||||||
|
ParseErrorKind::ClassTypeMissingTypeArgument {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
} => diagnostic_class_type_missing_type_argument(error, left_angle_bracket_position),
|
||||||
|
|
||||||
|
ParseErrorKind::ClassTypeMissingClosingAngleBracket {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
} => diagnostic_class_type_missing_closing_angle_bracket(
|
||||||
|
error,
|
||||||
|
left_angle_bracket_position,
|
||||||
|
),
|
||||||
|
|
||||||
|
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position,
|
||||||
|
} => diagnostic_parenthesized_expression_missing_closing_parenthesis(
|
||||||
|
error,
|
||||||
|
left_parenthesis_position,
|
||||||
|
),
|
||||||
|
|
||||||
|
ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error),
|
||||||
|
|
||||||
|
ParseErrorKind::ClassTypeInvalidTypeArgument {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
} => diagnostic_class_type_invalid_type_argument(error, left_angle_bracket_position),
|
||||||
|
|
||||||
|
ParseErrorKind::NewTooManyArguments {
|
||||||
|
left_parenthesis_position,
|
||||||
|
} => diagnostic_new_too_many_arguments(error, left_parenthesis_position),
|
||||||
|
|
||||||
|
ParseErrorKind::NewMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position,
|
||||||
|
} => diagnostic_new_missing_closing_parenthesis(error, left_parenthesis_position),
|
||||||
|
|
||||||
|
ParseErrorKind::NewMissingClassSpecifier {
|
||||||
|
new_keyword_position,
|
||||||
|
} => diagnostic_new_missing_class_specifier(error, new_keyword_position),
|
||||||
|
|
||||||
|
_ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind))
|
||||||
|
.primary_label(error.covered_span, "happened here")
|
||||||
|
.build(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -4,6 +4,9 @@
|
|||||||
//! parsing or doing lightweight frontend checks. They are intentionally small,
|
//! parsing or doing lightweight frontend checks. They are intentionally small,
|
||||||
//! depend only on [`AstSpan`], and are easy to construct and store.
|
//! depend only on [`AstSpan`], and are easy to construct and store.
|
||||||
|
|
||||||
|
mod expression;
|
||||||
|
mod render;
|
||||||
|
|
||||||
use crate::ast::AstSpan;
|
use crate::ast::AstSpan;
|
||||||
|
|
||||||
/// Classification of a diagnostic by its impact.
|
/// Classification of a diagnostic by its impact.
|
||||||
@ -110,43 +113,51 @@ impl Diagnostic {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns `true` iff severity is [`Severity::Error`].
|
/// Returns `true` iff severity is [`Severity::Error`].
|
||||||
|
#[must_use]
|
||||||
pub fn stops_compilation(&self) -> bool {
|
pub fn stops_compilation(&self) -> bool {
|
||||||
self.severity == Severity::Error
|
self.severity == Severity::Error
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the diagnostic code if present.
|
/// Returns the diagnostic code if present.
|
||||||
///
|
///
|
||||||
/// See [DiagnosticBuilder::code] for code scheme.
|
/// See [`DiagnosticBuilder::code`] for code scheme.
|
||||||
|
#[must_use]
|
||||||
pub fn code(&self) -> Option<&str> {
|
pub fn code(&self) -> Option<&str> {
|
||||||
self.code.as_deref()
|
self.code.as_deref()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the primary label, if any.
|
/// Returns the primary label, if any.
|
||||||
pub fn primary_label(&self) -> Option<&Label> {
|
#[must_use]
|
||||||
|
pub const fn primary_label(&self) -> Option<&Label> {
|
||||||
self.primary_label.as_ref()
|
self.primary_label.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the secondary labels in insertion order.
|
/// Returns the secondary labels in insertion order.
|
||||||
|
#[must_use]
|
||||||
pub fn secondary_labels(&self) -> &[Label] {
|
pub fn secondary_labels(&self) -> &[Label] {
|
||||||
&self.secondary_labels
|
&self.secondary_labels
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the headline.
|
/// Returns the headline.
|
||||||
|
#[must_use]
|
||||||
pub fn headline(&self) -> &str {
|
pub fn headline(&self) -> &str {
|
||||||
&self.headline
|
&self.headline
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the severity.
|
/// Returns the severity.
|
||||||
pub fn severity(&self) -> Severity {
|
#[must_use]
|
||||||
|
pub const fn severity(&self) -> Severity {
|
||||||
self.severity
|
self.severity
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the notes.
|
/// Returns the notes.
|
||||||
|
#[must_use]
|
||||||
pub fn notes(&self) -> &[String] {
|
pub fn notes(&self) -> &[String] {
|
||||||
&self.notes
|
&self.notes
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the help message, if any.
|
/// Returns the help message, if any.
|
||||||
|
#[must_use]
|
||||||
pub fn help(&self) -> Option<&str> {
|
pub fn help(&self) -> Option<&str> {
|
||||||
self.help.as_deref()
|
self.help.as_deref()
|
||||||
}
|
}
|
||||||
491
rottlib/src/diagnostics/render.rs
Normal file
491
rottlib/src/diagnostics/render.rs
Normal file
@ -0,0 +1,491 @@
|
|||||||
|
use crate::ast::AstSpan;
|
||||||
|
use crate::diagnostics::{self, Diagnostic, Severity};
|
||||||
|
use crate::lexer::TokenizedFile;
|
||||||
|
|
||||||
|
use core::convert::Into;
|
||||||
|
use crossterm::style::Stylize;
|
||||||
|
use crossterm::terminal::disable_raw_mode;
|
||||||
|
use std::cmp::max;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
|
const INDENT: &str = " ";
|
||||||
|
const MAX_LINES_LIMIT: usize = 10;
|
||||||
|
|
||||||
|
/*
|
||||||
|
error: expected one of `,`, `:`, or `}`, found `token_to`
|
||||||
|
--> rottlib/src/ast/mod.rs:80:13
|
||||||
|
|
|
||||||
|
78 | Self {
|
||||||
|
| ---- while parsing this struct
|
||||||
|
79 | token_from: self.token_from,scd
|
||||||
|
| --- while parsing this struct field
|
||||||
|
80 | token_to: std::cmp::max(self.token_to, right_most_index),
|
||||||
|
| ^^^^^^^^ expected one of `,`, `:`, or `}`
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
|
||||||
|
76 | / "asdasdas
|
||||||
|
77 | | asd1
|
||||||
|
78 | | asd2
|
||||||
|
79 | | asdasd"
|
||||||
|
| |___________________^ expected `()`, found `&str`
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
1. Get each span's range and total lines covered by spans as ranges;
|
||||||
|
2. We need `+N` more lines for `N` labels;
|
||||||
|
3.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// These are abstract rendering events, not self-contained draw commands.
|
||||||
|
// They are emitted in increasing order of "significant lines" (range starts/ends).
|
||||||
|
// The actual source span for a label is recovered later from its LabelType.
|
||||||
|
#[derive(PartialEq, Eq, Clone, Copy)]
|
||||||
|
enum RendererCommands {
|
||||||
|
StartRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
column: usize,
|
||||||
|
},
|
||||||
|
FinishRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
column: usize,
|
||||||
|
},
|
||||||
|
SingleRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
enum LineIndexType {
|
||||||
|
Normal(usize),
|
||||||
|
Missing,
|
||||||
|
Ellipsis,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Label ordering is semantic: primary first, then secondaries in diagnostic order.
|
||||||
|
// That order is also used to break visual ties when multiple labels would otherwise
|
||||||
|
// start or end on the same source line.
|
||||||
|
#[derive(PartialEq, Eq, Hash, Clone, Copy)]
|
||||||
|
enum LabelType {
|
||||||
|
Primary,
|
||||||
|
Secondary(usize),
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RangeSet {
|
||||||
|
primary_range: Option<RangeInclusive<usize>>,
|
||||||
|
secondary_ranges: Vec<RangeInclusive<usize>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RangeSet {
|
||||||
|
fn get(&self, index: usize) -> Option<&RangeInclusive<usize>> {
|
||||||
|
if self.primary_range.is_some() {
|
||||||
|
if index == 0 {
|
||||||
|
return self.primary_range.as_ref();
|
||||||
|
} else {
|
||||||
|
self.secondary_ranges.get(index - 1)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.secondary_ranges.get(index)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.secondary_ranges.len() + if self.primary_range.is_some() { 1 } else { 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter(&self) -> impl Iterator<Item = &RangeInclusive<usize>> {
|
||||||
|
self.primary_range
|
||||||
|
.iter()
|
||||||
|
.chain(self.secondary_ranges.iter())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn iter_labeled(&self) -> impl Iterator<Item = (LabelType, &RangeInclusive<usize>)> {
|
||||||
|
self.primary_range
|
||||||
|
.iter()
|
||||||
|
.map(|range| (LabelType::Primary, range))
|
||||||
|
.chain(
|
||||||
|
self.secondary_ranges
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(index, range)| (LabelType::Secondary(index), range)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_first_bound_above(&self, line_number: Option<usize>) -> Option<usize> {
|
||||||
|
self.iter()
|
||||||
|
.filter_map(|range| {
|
||||||
|
let start = *range.start();
|
||||||
|
let end = *range.end();
|
||||||
|
|
||||||
|
let start_ok = line_number.is_none_or(|n| start > n).then_some(start);
|
||||||
|
let end_ok = line_number.is_none_or(|n| end > n).then_some(end);
|
||||||
|
|
||||||
|
match (start_ok, end_ok) {
|
||||||
|
(Some(a), Some(b)) => Some(a.min(b)),
|
||||||
|
(Some(a), None) => Some(a),
|
||||||
|
(None, Some(b)) => Some(b),
|
||||||
|
(None, None) => None,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.min()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Converts labeled line ranges into an ordered stream of renderer events.
|
||||||
|
//
|
||||||
|
// Important invariants:
|
||||||
|
//
|
||||||
|
// 1. Commands are ordered by increasing significant line.
|
||||||
|
// A significant line is any line on which some label starts or ends.
|
||||||
|
//
|
||||||
|
// 2. If multiple labels would visually terminate on the same source line,
|
||||||
|
// the renderer treats them as ending on distinct phantom rows, ordered by
|
||||||
|
// diagnostic priority (primary/secondary order). This prevents intersections
|
||||||
|
// and means that same-line closings are intentionally linearized rather than
|
||||||
|
// treated as a geometric tie.
|
||||||
|
//
|
||||||
|
// 3. RendererCommands do not store source line numbers directly.
|
||||||
|
// Later rendering recovers the underlying span from LabelType and uses the
|
||||||
|
// event order to know when labels become active/inactive.
|
||||||
|
//
|
||||||
|
// 4. When a label starts on the same significant line where another label ends,
|
||||||
|
// starts are processed first. This is intentional: longer-lived/opening labels
|
||||||
|
// must occupy earlier columns so that shorter-lived/closing labels bend around
|
||||||
|
// them without intersecting.
|
||||||
|
fn make_renderer_commands(ranges: RangeSet) -> Vec<(usize, RendererCommands)> {
|
||||||
|
// Maps currently-open labels to the index of their StartRange command so that
|
||||||
|
// we can patch in the final column once the label closes.
|
||||||
|
let mut open_ranges = HashMap::new();
|
||||||
|
let mut commands = Vec::new();
|
||||||
|
let mut current_line = None;
|
||||||
|
while let Some(next_significant_line) = ranges.get_first_bound_above(current_line) {
|
||||||
|
current_line = Some(next_significant_line);
|
||||||
|
// First process all new ranges because they'll live longer and have
|
||||||
|
// to have earlier columns
|
||||||
|
for (label, range) in ranges.iter_labeled() {
|
||||||
|
if *range.start() == next_significant_line {
|
||||||
|
if range.start() != range.end() {
|
||||||
|
commands.push((
|
||||||
|
*range.start(),
|
||||||
|
RendererCommands::StartRange {
|
||||||
|
label_type: label,
|
||||||
|
column: 0,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
open_ranges.insert(label, commands.len() - 1);
|
||||||
|
} else {
|
||||||
|
commands.push((
|
||||||
|
*range.start(),
|
||||||
|
RendererCommands::SingleRange { label_type: label },
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Closing pass.
|
||||||
|
// The assigned column is the number of ranges that remain open after removing
|
||||||
|
// this label. Because same-line visual ties are already linearized by label
|
||||||
|
// priority / phantom rows, processing labels in iter_labeled() order is
|
||||||
|
// intentional here.
|
||||||
|
for (label, range) in ranges.iter_labeled() {
|
||||||
|
if *range.end() == next_significant_line {
|
||||||
|
if let Some(index) = open_ranges.remove(&label) {
|
||||||
|
// Column meaning:
|
||||||
|
// 0 = outermost / earliest lane
|
||||||
|
// larger values = further inward lanes
|
||||||
|
//
|
||||||
|
// We assign the column at close time, not at open time, because the final lane
|
||||||
|
// depends on which other ranges outlive this one.
|
||||||
|
let column = open_ranges.len();
|
||||||
|
if let Some((line_number, RendererCommands::StartRange { .. })) =
|
||||||
|
commands.get(index)
|
||||||
|
{
|
||||||
|
commands[index] = (
|
||||||
|
*line_number,
|
||||||
|
RendererCommands::StartRange {
|
||||||
|
label_type: label,
|
||||||
|
column,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
commands.push((
|
||||||
|
*range.end(),
|
||||||
|
RendererCommands::FinishRange {
|
||||||
|
label_type: label,
|
||||||
|
column,
|
||||||
|
},
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
commands
|
||||||
|
}
|
||||||
|
|
||||||
|
fn max_line_number_width(ranges: &RangeSet) -> usize {
|
||||||
|
let max_line = ranges.iter().map(|range| *range.end()).max().unwrap_or(0);
|
||||||
|
|
||||||
|
if max_line == 0 {
|
||||||
|
1
|
||||||
|
} else {
|
||||||
|
max_line.ilog10() as usize + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn span_to_range<'src>(span: AstSpan, file: &TokenizedFile<'src>) -> Option<RangeInclusive<usize>> {
|
||||||
|
let start_line = file.token_line(span.token_from)?;
|
||||||
|
let end_line = file.token_line(span.token_to)?;
|
||||||
|
|
||||||
|
if start_line <= end_line {
|
||||||
|
Some(start_line..=end_line)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_ranges<'src>(file: &TokenizedFile<'src>, diagnostic: &Diagnostic) -> RangeSet {
|
||||||
|
let mut result = RangeSet {
|
||||||
|
primary_range: None,
|
||||||
|
secondary_ranges: Vec::new(),
|
||||||
|
};
|
||||||
|
result.primary_range = diagnostic
|
||||||
|
.primary_label()
|
||||||
|
.and_then(|label| span_to_range(label.span, file));
|
||||||
|
for secondary in diagnostic.secondary_labels() {
|
||||||
|
if let Some(range) = span_to_range(secondary.span, file) {
|
||||||
|
result.secondary_ranges.push(range);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Diagnostic {
|
||||||
|
pub fn render<'src>(&self, file: &TokenizedFile<'src>, file_path: impl Into<String>) {
|
||||||
|
self.render_header();
|
||||||
|
println!("{INDENT}{}: {}", "in file".blue().bold(), file_path.into());
|
||||||
|
self.render_lines(file);
|
||||||
|
}
|
||||||
|
/*StartRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
column: usize,
|
||||||
|
},
|
||||||
|
FinishRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
},
|
||||||
|
SingleRange {
|
||||||
|
label_type: LabelType,
|
||||||
|
}, */
|
||||||
|
fn label_data(&self, label_type: LabelType) -> Option<(AstSpan, String)> {
|
||||||
|
match label_type {
|
||||||
|
LabelType::Primary => self
|
||||||
|
.primary_label()
|
||||||
|
.map(|label| (label.span, label.message.clone())),
|
||||||
|
LabelType::Secondary(id) => Some((
|
||||||
|
self.secondary_labels()[id].span,
|
||||||
|
self.secondary_labels()[id].message.clone(),
|
||||||
|
)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fn render_lines<'src>(&self, file: &TokenizedFile<'src>) {
|
||||||
|
let ranges = make_ranges(file, &self);
|
||||||
|
let max_line_number_width = max(max_line_number_width(&ranges), 3);
|
||||||
|
let commands = make_renderer_commands(ranges);
|
||||||
|
let mut max_column = 0;
|
||||||
|
for command in &commands {
|
||||||
|
if let (_, RendererCommands::StartRange { column, .. }) = command {
|
||||||
|
max_column = max(max_column, *column);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let mut vertical_stack = Vec::new();
|
||||||
|
vertical_stack.resize(max_column + 1, None);
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
while i < commands.len() {
|
||||||
|
let mut current_line = commands[i].0;
|
||||||
|
let mut single_commands = Vec::new();
|
||||||
|
let mut start_commands = Vec::new();
|
||||||
|
let mut finish_commands = Vec::new();
|
||||||
|
while i < commands.len() && current_line == commands[i].0 {
|
||||||
|
match commands[i].1 {
|
||||||
|
RendererCommands::SingleRange { label_type } => {
|
||||||
|
single_commands.push(label_type)
|
||||||
|
}
|
||||||
|
RendererCommands::StartRange { label_type, column } => {
|
||||||
|
start_commands.push((label_type, column));
|
||||||
|
}
|
||||||
|
RendererCommands::FinishRange { label_type, column } => {
|
||||||
|
finish_commands.push((label_type, column))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
// !!!!!!!!!!!!!!!!
|
||||||
|
// First - update line drawing stack
|
||||||
|
for (label_type, column) in start_commands {
|
||||||
|
vertical_stack[column] = Some(label_type);
|
||||||
|
}
|
||||||
|
// Next - draw the line
|
||||||
|
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
|
||||||
|
for label_type in single_commands {
|
||||||
|
self.render_single_command(
|
||||||
|
label_type,
|
||||||
|
max_line_number_width,
|
||||||
|
file,
|
||||||
|
&vertical_stack,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Next - render finish commands (drop for now)
|
||||||
|
for (label_type, column) in finish_commands {
|
||||||
|
self.render_single_command(
|
||||||
|
label_type,
|
||||||
|
max_line_number_width,
|
||||||
|
file,
|
||||||
|
&vertical_stack,
|
||||||
|
);
|
||||||
|
vertical_stack[column] = None;
|
||||||
|
}
|
||||||
|
// !!!!!!!!!!!!!!!!
|
||||||
|
// Render some more lines
|
||||||
|
let mut countdown = 3;
|
||||||
|
current_line += 1;
|
||||||
|
while current_line < commands[i].0 {
|
||||||
|
if countdown == 0 {
|
||||||
|
if current_line + 1 == commands[i].0 {
|
||||||
|
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
self.make_line_prefix(
|
||||||
|
LineIndexType::Ellipsis,
|
||||||
|
max_line_number_width,
|
||||||
|
&vertical_stack
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
self.draw_line(current_line, max_line_number_width, file, &vertical_stack);
|
||||||
|
}
|
||||||
|
current_line += 1;
|
||||||
|
countdown -= 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_single_command<'src>(
|
||||||
|
&self,
|
||||||
|
label_type: LabelType,
|
||||||
|
max_line_number_width: usize,
|
||||||
|
file: &TokenizedFile<'src>,
|
||||||
|
vertical_stack: &[Option<LabelType>],
|
||||||
|
) {
|
||||||
|
let Some((span, message)) = self.label_data(label_type) else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
let Some(visible) = file.span_visible_on_line(span) else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut builder = self.make_line_prefix(
|
||||||
|
LineIndexType::Missing,
|
||||||
|
max_line_number_width,
|
||||||
|
vertical_stack,
|
||||||
|
);
|
||||||
|
|
||||||
|
builder.push_str(&" ".repeat(visible.columns.start));
|
||||||
|
|
||||||
|
let underline_width = (visible.columns.end - visible.columns.start).max(1);
|
||||||
|
let mut underline_label = "^".repeat(underline_width);
|
||||||
|
underline_label.push_str(&format!(" {}", message));
|
||||||
|
|
||||||
|
match label_type {
|
||||||
|
LabelType::Primary => {
|
||||||
|
if self.severity == Severity::Error {
|
||||||
|
builder.push_str(&underline_label.red().bold().to_string());
|
||||||
|
} else {
|
||||||
|
builder.push_str(&underline_label.yellow().bold().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LabelType::Secondary(_) => {
|
||||||
|
builder.push_str(&underline_label.blue().bold().to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("{builder}");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn draw_line<'src>(
|
||||||
|
&self,
|
||||||
|
current_line: usize,
|
||||||
|
max_line_number_width: usize,
|
||||||
|
file: &TokenizedFile<'src>,
|
||||||
|
vertical_stack: &[Option<LabelType>],
|
||||||
|
) {
|
||||||
|
println!(
|
||||||
|
"{}{}",
|
||||||
|
self.make_line_prefix(
|
||||||
|
LineIndexType::Normal(current_line),
|
||||||
|
max_line_number_width,
|
||||||
|
vertical_stack
|
||||||
|
),
|
||||||
|
file.line_text(current_line).unwrap_or_default()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_line_prefix<'src>(
|
||||||
|
&self,
|
||||||
|
current_line: LineIndexType,
|
||||||
|
max_line_number_width: usize,
|
||||||
|
vertical_stack: &[Option<LabelType>],
|
||||||
|
) -> String {
|
||||||
|
let line_text = match current_line {
|
||||||
|
LineIndexType::Normal(current_line) => (current_line + 1).to_string(),
|
||||||
|
LineIndexType::Missing => "".to_string(),
|
||||||
|
LineIndexType::Ellipsis => "...".to_string(),
|
||||||
|
};
|
||||||
|
let line_padding = " ".repeat(max_line_number_width - line_text.len());
|
||||||
|
let mut builder = format!(" {}{} | ", line_padding, line_text)
|
||||||
|
.blue()
|
||||||
|
.bold()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
for vertical_line in vertical_stack {
|
||||||
|
if let Some(label) = vertical_line {
|
||||||
|
let piece = match label {
|
||||||
|
LabelType::Primary => {
|
||||||
|
if self.severity == Severity::Error {
|
||||||
|
" |".red()
|
||||||
|
} else {
|
||||||
|
" |".yellow()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LabelType::Secondary(_) => " |".blue(),
|
||||||
|
}
|
||||||
|
.to_string();
|
||||||
|
builder.push_str(&piece);
|
||||||
|
} else {
|
||||||
|
builder.push_str(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder
|
||||||
|
}
|
||||||
|
|
||||||
|
fn render_header(&self) {
|
||||||
|
let severity_label = match self.severity {
|
||||||
|
Severity::Error => "error".red(),
|
||||||
|
Severity::Warning => "warning".yellow(),
|
||||||
|
};
|
||||||
|
if let Some(ref code) = self.code {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
format!("{}[{}]: {}", severity_label, code, self.headline).bold()
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
println!(
|
||||||
|
"{}",
|
||||||
|
format!("{}: {}", severity_label, self.headline).bold()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,83 +0,0 @@
|
|||||||
//! Debug-only helpers for [`TokenizedFile`]
|
|
||||||
//!
|
|
||||||
//! This module is **compiled only if**
|
|
||||||
//!
|
|
||||||
//! * the current build profile has `debug_assertions` enabled, or
|
|
||||||
//! * the crate is built with the `debug` cargo feature.
|
|
||||||
//!
|
|
||||||
//! These checks have been moved to the parent module.
|
|
||||||
|
|
||||||
/// A technical trait that adds debug helpers to the lexer.
|
|
||||||
pub trait DebugTools {
|
|
||||||
/// Pretty-prints the internal layout of the tokenised file - useful when
|
|
||||||
/// writing new passes or hunting lexer bugs.
|
|
||||||
///
|
|
||||||
/// This method writes the layout directly to standard output.
|
|
||||||
///
|
|
||||||
/// The format is unspecified, may change, and is not intended for
|
|
||||||
/// external tools.
|
|
||||||
///
|
|
||||||
/// Each line in the printed layout starts with its 0-based number for
|
|
||||||
/// convenience.
|
|
||||||
fn dump_debug_layout(&self);
|
|
||||||
|
|
||||||
/// Reconstructs the exact, lossless source text that was fed to
|
|
||||||
/// [`super::TokenizedFile::from_source`] from internal representation -
|
|
||||||
/// useful for manually verifying that the lexer works.
|
|
||||||
fn reconstruct_source(&self) -> String;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src> DebugTools for super::TokenizedFile<'src> {
|
|
||||||
fn reconstruct_source(&self) -> String {
|
|
||||||
self.buffer.iter().map(|span| span.lexeme).collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn dump_debug_layout(&self) {
|
|
||||||
for (row_idx, line) in self.lines.iter().enumerate() {
|
|
||||||
println!("Line {}", row_idx + 1);
|
|
||||||
|
|
||||||
match (line.continued_from, line.local_range()) {
|
|
||||||
// Stand-alone line (all tokens start here)
|
|
||||||
(None, Some(range)) => {
|
|
||||||
println!("\t[Standalone]");
|
|
||||||
dump_spans(&self.buffer[range.clone()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pure continuation - the only thing on this line is
|
|
||||||
// the remainder of a multi-line token that started earlier.
|
|
||||||
(Some(origin_row), None) => {
|
|
||||||
println!(
|
|
||||||
"\t[Continued from line {} - no new tokens here]",
|
|
||||||
origin_row + 1
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Continuation **plus** some fresh tokens that begin here.
|
|
||||||
(Some(origin_row), Some(range)) => {
|
|
||||||
println!("\t[Continued from line {} + new tokens]", origin_row + 1);
|
|
||||||
dump_spans(&self.buffer[range.clone()]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// An empty physical line (should be rare, but let's be safe).
|
|
||||||
(None, None) => {
|
|
||||||
println!("\t[Empty line]");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper that prints every span in `spans` together with its UTF-16
|
|
||||||
/// column boundaries.
|
|
||||||
fn dump_spans<'src>(spans: &[super::TokenPiece<'src>]) {
|
|
||||||
let mut col_utf16 = 0usize;
|
|
||||||
for span in spans {
|
|
||||||
let start = col_utf16;
|
|
||||||
let end = start + span.length_utf16;
|
|
||||||
println!(
|
|
||||||
"\t\t{:?} @ {}-{}: {:?}",
|
|
||||||
span.token, start, end, span.lexeme
|
|
||||||
);
|
|
||||||
col_utf16 = end;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,200 +0,0 @@
|
|||||||
//! Sub-module that adds an iterator to [`TokenizedFile`] which yields tokens in
|
|
||||||
//! the order they appear in the source code.
|
|
||||||
//!
|
|
||||||
//! ## Examples
|
|
||||||
//!
|
|
||||||
//! ```rust
|
|
||||||
//! let iter = TokenizedFile::from_str("0 / 0").tokens().without_whitespace();
|
|
||||||
//! ```
|
|
||||||
//!
|
|
||||||
//! ## Terminology: continued tokens
|
|
||||||
//!
|
|
||||||
//! Some [`super::Token`]s (e.g. [`super::Token::CppText`] or
|
|
||||||
//! [`super::Token::BlockComment`] can span multiple lines and are recorded on
|
|
||||||
//! every line on which they appear (usually as the first, and sometimes
|
|
||||||
//! the only, token).
|
|
||||||
//! In this module these are referred to as "continued" or
|
|
||||||
//! "carried-over" tokens.
|
|
||||||
//! Since our iterator needs to return each token only once, we take special
|
|
||||||
//! care to skip such continued tokens during iteration.
|
|
||||||
|
|
||||||
use super::{TokenLocation, TokenPiece, TokenizedFile};
|
|
||||||
|
|
||||||
/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving
|
|
||||||
/// their order of appearance in the original source file.
|
|
||||||
///
|
|
||||||
/// After exhaustion it keeps returning [`None`].
|
|
||||||
#[must_use]
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct Tokens<'src> {
|
|
||||||
/// [`TokenLocation`] of the next token to be returned.
|
|
||||||
cursor: TokenLocation,
|
|
||||||
/// [`TokenizedFile`] whose tokens we're iterating over.
|
|
||||||
source_file: &'src TokenizedFile<'src>,
|
|
||||||
/// When `true`, whitespace tokens are skipped.
|
|
||||||
skip_whitespace: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Because we can only return [`None`] after we've returned it once.
|
|
||||||
impl<'src> std::iter::FusedIterator for Tokens<'src> {}
|
|
||||||
|
|
||||||
impl<'src> Tokens<'src> {
|
|
||||||
/// Makes the iterator skip all whitespace tokens.
|
|
||||||
#[must_use]
|
|
||||||
#[inline]
|
|
||||||
pub fn without_whitespace(mut self) -> Self {
|
|
||||||
self.skip_whitespace = true;
|
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns the position of the next new token, skipping carried-over pieces
|
|
||||||
// and blank lines.
|
|
||||||
fn advance_position(&self, position: TokenLocation) -> TokenLocation {
|
|
||||||
let TokenLocation::Position {
|
|
||||||
mut line,
|
|
||||||
mut column,
|
|
||||||
} = position
|
|
||||||
else {
|
|
||||||
return TokenLocation::EndOfFile;
|
|
||||||
};
|
|
||||||
if let Some(current_line) = self.source_file.lines.get(line) {
|
|
||||||
// `Line::len()` also counts a possible token that continued from
|
|
||||||
// the previous line.
|
|
||||||
if column + 1 < current_line.len() {
|
|
||||||
column += 1;
|
|
||||||
return TokenLocation::Position { line, column };
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Current line is exhausted: walk downward until we find the first line
|
|
||||||
// that **owns local tokens**, because we only want *new* token,
|
|
||||||
// not continued from previous lines (they were already iterated over).
|
|
||||||
line += 1;
|
|
||||||
while let Some(next_line) = self.source_file.lines.get(line) {
|
|
||||||
if next_line.local_range().is_some() {
|
|
||||||
// Start at the first *local* token,
|
|
||||||
// skipping any carried-over one
|
|
||||||
column = if next_line.continued_from.is_some() {
|
|
||||||
1
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
return TokenLocation::Position { line, column };
|
|
||||||
}
|
|
||||||
line += 1; // keep skipping empty / pure-carried lines
|
|
||||||
}
|
|
||||||
// No more tokens.
|
|
||||||
TokenLocation::EndOfFile
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a new iterator.
|
|
||||||
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
|
|
||||||
let mut new_iterator = Tokens {
|
|
||||||
source_file,
|
|
||||||
cursor: TokenLocation::Position { line: 0, column: 0 },
|
|
||||||
skip_whitespace: false,
|
|
||||||
};
|
|
||||||
// We need to land on the first existing token so [`Iterator::next`]
|
|
||||||
// can assume cursor is valid.
|
|
||||||
while new_iterator.cursor != TokenLocation::EndOfFile {
|
|
||||||
if new_iterator.source_file.get(new_iterator.cursor).is_some() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
|
|
||||||
}
|
|
||||||
new_iterator
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src> Iterator for Tokens<'src> {
|
|
||||||
type Item = (TokenLocation, TokenPiece<'src>);
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
// We only ever loop to discard whitespaces when the flag is on
|
|
||||||
while self.cursor != TokenLocation::EndOfFile {
|
|
||||||
let token_location = self.cursor;
|
|
||||||
let token_piece = *self.source_file.get(self.cursor)?;
|
|
||||||
self.cursor = self.advance_position(self.cursor);
|
|
||||||
|
|
||||||
// Optional whitespace-skip
|
|
||||||
if !self.skip_whitespace || !token_piece.token.is_whitespace() {
|
|
||||||
return Some((token_location, token_piece));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src> TokenizedFile<'src> {
|
|
||||||
// Returns the final local token in `line_number`
|
|
||||||
// (used to resolve column 0 of a continued line).
|
|
||||||
fn last_piece_in_line(&self, line_number: usize) -> Option<&TokenPiece> {
|
|
||||||
self.lines
|
|
||||||
.get(line_number)
|
|
||||||
.and_then(|line| line.local_range())
|
|
||||||
// `Line::local_range()` is guaranteed to return non-empty `Range`.
|
|
||||||
.and_then(|range| self.buffer.get(range.end - 1))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns [`TokenPiece`] at a given location if it exists.
|
|
||||||
///
|
|
||||||
/// If the line specified by [`TokenLocation`] starts with a token that
|
|
||||||
/// continues from the previous line - column `0` refers to that token.
|
|
||||||
///
|
|
||||||
/// Never panics, invalid position returns [`None`].
|
|
||||||
///
|
|
||||||
/// ## Examples
|
|
||||||
///
|
|
||||||
/// ```rust
|
|
||||||
/// use super::{TokenizedFile, TokenLocation, Token};
|
|
||||||
/// let file = TokenizedFile::from_str("0 / 0");
|
|
||||||
/// assert_eq!(
|
|
||||||
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),
|
|
||||||
/// Some(Token::Divide),
|
|
||||||
/// );
|
|
||||||
/// ```
|
|
||||||
#[track_caller]
|
|
||||||
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
|
|
||||||
let TokenLocation::Position { line, column } = position else {
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
let line = self.lines.get(line)?;
|
|
||||||
let column = column;
|
|
||||||
if column >= line.len() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
if let Some(spanned_line_number) = line.continued_from
|
|
||||||
&& column == 0
|
|
||||||
{
|
|
||||||
self.last_piece_in_line(spanned_line_number)
|
|
||||||
} else {
|
|
||||||
// If we have a token that continued from the previous line,
|
|
||||||
// then, relative to `self.buffer`, our `column` is actually 1-based
|
|
||||||
// and we need to shift it back to being 0-based.
|
|
||||||
let token_position =
|
|
||||||
line.local_range.start + column - if line.continued_from.is_some() { 1 } else { 0 };
|
|
||||||
self.buffer.get(token_position)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns an iterator over all contained tokens in the order they appear
|
|
||||||
/// in the original source file.
|
|
||||||
///
|
|
||||||
/// By default includes all tokens, including whitespace and comments.
|
|
||||||
///
|
|
||||||
/// Returns the same iterator as [`TokenizedFile::into_iter`]
|
|
||||||
#[must_use]
|
|
||||||
#[inline]
|
|
||||||
pub fn tokens(&'src self) -> Tokens<'src> {
|
|
||||||
Tokens::new(self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'src> IntoIterator for &'src TokenizedFile<'src> {
|
|
||||||
type Item = (TokenLocation, TokenPiece<'src>);
|
|
||||||
type IntoIter = Tokens<'src>;
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn into_iter(self) -> Self::IntoIter {
|
|
||||||
self.tokens()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,526 +0,0 @@
|
|||||||
//! Lexer for UnrealScript that understands inline `cpptext { ... }` blocks.
|
|
||||||
//!
|
|
||||||
//! ## Notable details
|
|
||||||
//!
|
|
||||||
//! Lexer for UnrealScript that recognizes inline `cpptext { ... }` blocks.
|
|
||||||
//!
|
|
||||||
//! In UnrealScript, `cpptext` lets authors embed raw C++ between braces.
|
|
||||||
//! Because whitespace, newlines, or comments may appear between the
|
|
||||||
//! `cpptext` keyword and the opening `{`, the lexer must remember that
|
|
||||||
//! it has just seen `cpptext` - hence a state machine.
|
|
||||||
//!
|
|
||||||
//! ## Modes
|
|
||||||
//!
|
|
||||||
//! - **Normal** - ordinary UnrealScript tokens.
|
|
||||||
//! - **AwaitingCppBlock** - after `cpptext`, waiting for the next `{`.
|
|
||||||
//!
|
|
||||||
//! When that brace arrives, the lexer consumes the entire C++ block as
|
|
||||||
//! one token (`Token::Brace(BraceKind::CppBlock)`), tracking nested
|
|
||||||
//! braces, strings, and comments on the way. If the closing `}` is
|
|
||||||
//! missing, everything to EOF is treated as C++; downstream parsers must
|
|
||||||
//! handle that gracefully.
|
|
||||||
|
|
||||||
use logos::Lexer;
|
|
||||||
|
|
||||||
/// Which lexer mode we're in. See the module docs for the full story.
|
|
||||||
#[derive(Default, Clone, Copy, PartialEq, Eq)]
|
|
||||||
enum LexerMode {
|
|
||||||
/// Lexing regular UnrealScript.
|
|
||||||
#[default]
|
|
||||||
Normal,
|
|
||||||
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
|
|
||||||
AwaitingCppBlock,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extra per-lexer state. Currently just holds the [`Mode`].
|
|
||||||
///
|
|
||||||
/// This is a logos-specific implementation detail.
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct LexerState {
|
|
||||||
mode: LexerMode,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Are these braces "real" UnrealScript braces, or the start/end of a C++ block?
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
|
||||||
pub enum BraceKind {
|
|
||||||
Normal,
|
|
||||||
CppBlock,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// All UnrealScript tokens that our compiler distinguishes.
|
|
||||||
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
|
||||||
#[logos(extras = LexerState)]
|
|
||||||
pub enum Token {
|
|
||||||
// # Compiler/directive keywords
|
|
||||||
#[regex(r"(?i)#exec[^\r\n]*(\r|\n|\r\n)")]
|
|
||||||
ExecDirective,
|
|
||||||
#[regex("(?i)cpptext", |lex| { lex.extras.mode = LexerMode::AwaitingCppBlock; })]
|
|
||||||
CppText,
|
|
||||||
|
|
||||||
// # Declaration & structural keywords
|
|
||||||
#[regex("(?i)class")]
|
|
||||||
Class,
|
|
||||||
#[regex("(?i)struct")]
|
|
||||||
Struct,
|
|
||||||
#[regex("(?i)enum")]
|
|
||||||
Enum,
|
|
||||||
#[regex("(?i)state")]
|
|
||||||
State,
|
|
||||||
#[regex("(?i)function")]
|
|
||||||
Function,
|
|
||||||
#[regex("(?i)event")]
|
|
||||||
Event,
|
|
||||||
#[regex("(?i)delegate")]
|
|
||||||
Delegate,
|
|
||||||
#[regex("(?i)var")]
|
|
||||||
Var,
|
|
||||||
#[regex("(?i)local")]
|
|
||||||
Local,
|
|
||||||
|
|
||||||
// # Inheritance, interface, dependencies
|
|
||||||
#[regex("(?i)extends")]
|
|
||||||
Extends,
|
|
||||||
#[regex("(?i)dependson")]
|
|
||||||
DependsOn,
|
|
||||||
|
|
||||||
// # Access modifiers & properties
|
|
||||||
#[regex("(?i)private")]
|
|
||||||
Private,
|
|
||||||
#[regex("(?i)protected")]
|
|
||||||
Protected,
|
|
||||||
#[regex("(?i)public")]
|
|
||||||
Public,
|
|
||||||
#[regex("(?i)const")]
|
|
||||||
Const,
|
|
||||||
#[regex("(?i)static")]
|
|
||||||
Static,
|
|
||||||
#[regex("(?i)native")]
|
|
||||||
Native,
|
|
||||||
#[regex("(?i)abstract")]
|
|
||||||
Abstract,
|
|
||||||
#[regex("(?i)deprecated")]
|
|
||||||
Deprecated,
|
|
||||||
|
|
||||||
// # UnrealScript metadata/specifiers
|
|
||||||
#[regex("(?i)default")]
|
|
||||||
Default,
|
|
||||||
#[regex("(?i)defaultproperties")]
|
|
||||||
DefaultProperties,
|
|
||||||
#[regex("(?i)optional")]
|
|
||||||
Optional,
|
|
||||||
#[regex("(?i)config")]
|
|
||||||
Config,
|
|
||||||
#[regex("(?i)perobjectconfig")]
|
|
||||||
PerObjectConfig,
|
|
||||||
#[regex("(?i)globalconfig")]
|
|
||||||
GlobalConfig,
|
|
||||||
#[regex("(?i)collapsecategories")]
|
|
||||||
CollapseCategories,
|
|
||||||
#[regex("(?i)dontcollapsecategories")]
|
|
||||||
DontCollapseCategories,
|
|
||||||
#[regex("(?i)hidecategories")]
|
|
||||||
HideCategories,
|
|
||||||
#[regex("(?i)localized")]
|
|
||||||
Localized,
|
|
||||||
#[regex("(?i)placeable")]
|
|
||||||
Placeable,
|
|
||||||
#[regex("(?i)notplaceable")]
|
|
||||||
NotPlaceable,
|
|
||||||
#[regex("(?i)editinlinenew")]
|
|
||||||
EditInlineNew,
|
|
||||||
#[regex("(?i)noteditinlinenew")]
|
|
||||||
NotEditInlineNew,
|
|
||||||
#[regex("(?i)dynamicrecompile")]
|
|
||||||
DynamicRecompile,
|
|
||||||
#[regex("(?i)transient")]
|
|
||||||
Transient,
|
|
||||||
#[regex("(?i)operator")]
|
|
||||||
Operator,
|
|
||||||
#[regex("(?i)simulated")]
|
|
||||||
Simulated,
|
|
||||||
#[regex("(?i)latent")]
|
|
||||||
Latent,
|
|
||||||
#[regex("(?i)iterator")]
|
|
||||||
Iterator,
|
|
||||||
#[regex("(?i)out")]
|
|
||||||
Out,
|
|
||||||
#[regex("(?i)skip")]
|
|
||||||
Skip,
|
|
||||||
#[regex("(?i)singular")]
|
|
||||||
Singular,
|
|
||||||
#[regex("(?i)coerce")]
|
|
||||||
Coerce,
|
|
||||||
#[regex("(?i)assert")]
|
|
||||||
Assert,
|
|
||||||
#[regex("(?i)ignores")]
|
|
||||||
Ignores,
|
|
||||||
#[regex("(?i)within")]
|
|
||||||
Within,
|
|
||||||
#[regex("(?i)noexport")]
|
|
||||||
NoExport,
|
|
||||||
|
|
||||||
// # Replication-related
|
|
||||||
#[regex("(?i)reliable")]
|
|
||||||
Reliable,
|
|
||||||
#[regex("(?i)unreliable")]
|
|
||||||
Unreliable,
|
|
||||||
#[regex("(?i)replication")]
|
|
||||||
Replication,
|
|
||||||
#[regex("(?i)nativereplication")]
|
|
||||||
NativeReplication,
|
|
||||||
|
|
||||||
// # Control-flow keywords
|
|
||||||
#[regex("(?i)goto")]
|
|
||||||
Goto,
|
|
||||||
#[regex("(?i)if")]
|
|
||||||
If,
|
|
||||||
#[regex("(?i)else")]
|
|
||||||
Else,
|
|
||||||
#[regex("(?i)switch")]
|
|
||||||
Switch,
|
|
||||||
#[regex("(?i)case")]
|
|
||||||
Case,
|
|
||||||
#[regex("(?i)for")]
|
|
||||||
For,
|
|
||||||
#[regex("(?i)foreach")]
|
|
||||||
ForEach,
|
|
||||||
#[regex("(?i)while")]
|
|
||||||
While,
|
|
||||||
#[regex("(?i)do")]
|
|
||||||
Do,
|
|
||||||
#[regex("(?i)until")]
|
|
||||||
Until,
|
|
||||||
#[regex("(?i)break")]
|
|
||||||
Break,
|
|
||||||
#[regex("(?i)continue")]
|
|
||||||
Continue,
|
|
||||||
#[regex("(?i)return")]
|
|
||||||
Return,
|
|
||||||
|
|
||||||
// # Built-in types
|
|
||||||
#[regex("(?i)int")]
|
|
||||||
Int,
|
|
||||||
#[regex("(?i)float")]
|
|
||||||
Float,
|
|
||||||
#[regex("(?i)bool")]
|
|
||||||
Bool,
|
|
||||||
#[regex("(?i)byte")]
|
|
||||||
Byte,
|
|
||||||
#[regex("(?i)string")]
|
|
||||||
String,
|
|
||||||
#[regex("(?i)array")]
|
|
||||||
Array,
|
|
||||||
#[regex("(?i)name")]
|
|
||||||
Name,
|
|
||||||
|
|
||||||
// # Literals & identifiers
|
|
||||||
#[regex(r"0[xX][0-9A-Fa-f]+|[0-9]+")]
|
|
||||||
IntegerLiteral,
|
|
||||||
#[regex(r"[0-9]*\.[0-9]+([eE][+-]?[0-9]+)?")]
|
|
||||||
FloatLiteral,
|
|
||||||
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
|
|
||||||
StringLiteral,
|
|
||||||
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
|
|
||||||
NameLiteral,
|
|
||||||
#[regex("(?i)true")]
|
|
||||||
True,
|
|
||||||
#[regex("(?i)false")]
|
|
||||||
False,
|
|
||||||
#[regex("(?i)none")]
|
|
||||||
None,
|
|
||||||
#[regex("(?i)self")]
|
|
||||||
SelfKeyword,
|
|
||||||
#[regex("(?i)new")]
|
|
||||||
New,
|
|
||||||
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
|
|
||||||
Identifier,
|
|
||||||
|
|
||||||
// # Operations
|
|
||||||
// ## Exponentiation
|
|
||||||
#[token("**")]
|
|
||||||
Exponentiation,
|
|
||||||
// ## Unary
|
|
||||||
#[token("++")]
|
|
||||||
Increment,
|
|
||||||
#[token("--")]
|
|
||||||
Decrement,
|
|
||||||
#[token("!")]
|
|
||||||
Not,
|
|
||||||
#[token("~")]
|
|
||||||
BitwiseNot,
|
|
||||||
// ## Vector
|
|
||||||
#[regex("(?i)dot")]
|
|
||||||
Dot,
|
|
||||||
#[regex("(?i)cross")]
|
|
||||||
Cross,
|
|
||||||
// ## Multiplicative
|
|
||||||
#[token("*")]
|
|
||||||
Multiply,
|
|
||||||
#[token("/")]
|
|
||||||
Divide,
|
|
||||||
#[token("%")]
|
|
||||||
Modulo,
|
|
||||||
// ## Additive
|
|
||||||
#[token("+")]
|
|
||||||
Plus,
|
|
||||||
#[token("-")]
|
|
||||||
Minus,
|
|
||||||
// ## String manipulation
|
|
||||||
#[token("@")]
|
|
||||||
ConcatSpace,
|
|
||||||
#[token("$")]
|
|
||||||
Concat,
|
|
||||||
// ## Shifts
|
|
||||||
#[token("<<")]
|
|
||||||
LeftShift,
|
|
||||||
#[token(">>>")]
|
|
||||||
LogicalRightShift,
|
|
||||||
#[token(">>")]
|
|
||||||
RightShift,
|
|
||||||
// ## Relational
|
|
||||||
#[token("<")]
|
|
||||||
Less,
|
|
||||||
#[token("<=")]
|
|
||||||
LessEqual,
|
|
||||||
#[token(">")]
|
|
||||||
Greater,
|
|
||||||
#[token(">=")]
|
|
||||||
GreaterEqual,
|
|
||||||
#[token("==")]
|
|
||||||
Equal,
|
|
||||||
#[token("!=")]
|
|
||||||
NotEqual,
|
|
||||||
#[token("~=")]
|
|
||||||
ApproximatelyEqual,
|
|
||||||
#[regex("(?i)clockwisefrom")]
|
|
||||||
ClockwiseFrom,
|
|
||||||
// ## Bitwise
|
|
||||||
#[token("&")]
|
|
||||||
BitwiseAnd,
|
|
||||||
#[token("|")]
|
|
||||||
BitwiseOr,
|
|
||||||
#[token("^")]
|
|
||||||
BitwiseXor,
|
|
||||||
// ## Logical
|
|
||||||
#[token("&&")]
|
|
||||||
And,
|
|
||||||
#[token("^^")]
|
|
||||||
Xor,
|
|
||||||
#[token("||")]
|
|
||||||
Or,
|
|
||||||
// ## Assigments
|
|
||||||
#[token("=")]
|
|
||||||
Assign,
|
|
||||||
#[token("*=")]
|
|
||||||
MultiplyAssign,
|
|
||||||
#[token("/=")]
|
|
||||||
DivideAssign,
|
|
||||||
#[token("%=")]
|
|
||||||
ModuloAssign,
|
|
||||||
#[token("+=")]
|
|
||||||
PlusAssign,
|
|
||||||
#[token("-=")]
|
|
||||||
MinusAssign,
|
|
||||||
#[token("$=")]
|
|
||||||
ConcatAssign,
|
|
||||||
#[token("@=")]
|
|
||||||
ConcatSpaceAssign,
|
|
||||||
|
|
||||||
// # Punctuation & delimiters
|
|
||||||
#[token("(")]
|
|
||||||
LeftParenthesis,
|
|
||||||
#[token(")")]
|
|
||||||
RightParenthesis,
|
|
||||||
#[token("{", handle_brace)]
|
|
||||||
Brace(BraceKind),
|
|
||||||
#[token("}")]
|
|
||||||
RightBrace,
|
|
||||||
#[token("[")]
|
|
||||||
LeftBracket,
|
|
||||||
#[token("]")]
|
|
||||||
RightBracket,
|
|
||||||
#[token(";")]
|
|
||||||
Semicolon,
|
|
||||||
#[token(",")]
|
|
||||||
Comma,
|
|
||||||
#[token(".")]
|
|
||||||
Period,
|
|
||||||
#[token(":")]
|
|
||||||
Colon,
|
|
||||||
#[token("#")]
|
|
||||||
Hash,
|
|
||||||
#[token("?")]
|
|
||||||
Question,
|
|
||||||
|
|
||||||
// # Comments & whitespaces
|
|
||||||
#[regex(r"//[^\r\n]*")]
|
|
||||||
LineComment,
|
|
||||||
#[regex(r"/\*", handle_block_comment)]
|
|
||||||
BlockComment,
|
|
||||||
#[regex(r"\r\n|\n|\r")]
|
|
||||||
Newline,
|
|
||||||
#[regex(r"[ \t]+")]
|
|
||||||
Whitespace,
|
|
||||||
|
|
||||||
// # Technical
|
|
||||||
Error,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Token {
|
|
||||||
/// Returns `true` if this token is a newline (`Token::NewLine`).
|
|
||||||
pub fn is_newline(&self) -> bool {
|
|
||||||
matches!(self, Token::Newline)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if this token is trivia whitespace
|
|
||||||
/// (`Token::Whitespace` or `Token::NewLine`).
|
|
||||||
///
|
|
||||||
/// Note: comments are **not** considered whitespace.
|
|
||||||
pub fn is_whitespace(&self) -> bool {
|
|
||||||
matches!(&self, Token::Whitespace | Token::Newline)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if this token may span multiple physical lines
|
|
||||||
/// (i.e. can contain newline characters).
|
|
||||||
pub fn can_span_lines(&self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self,
|
|
||||||
Token::BlockComment | Token::Brace(BraceKind::CppBlock) | Token::Error
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if this token can appear in type position
|
|
||||||
/// (either a built-in type keyword or an identifier).
|
|
||||||
pub fn is_valid_type_name_token(&self) -> bool {
|
|
||||||
matches!(
|
|
||||||
self,
|
|
||||||
Token::Int
|
|
||||||
| Token::Float
|
|
||||||
| Token::Bool
|
|
||||||
| Token::Byte
|
|
||||||
| Token::String
|
|
||||||
| Token::Array
|
|
||||||
| Token::Name
|
|
||||||
| Token::Identifier
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume a /* ... */ block comment with arbitrary nesting
|
|
||||||
/// (like UnrealScript allows).
|
|
||||||
///
|
|
||||||
/// Matches the whole comment (delimiters included) or [`None`] if the file ends
|
|
||||||
/// before every `/*` is closed.
|
|
||||||
fn handle_block_comment(lexer: &mut Lexer<Token>) -> Option<()> {
|
|
||||||
let mut comment_depth = 1;
|
|
||||||
while let Some(next_char) = lexer.remainder().chars().next() {
|
|
||||||
if lexer.remainder().starts_with("/*") {
|
|
||||||
comment_depth += 1;
|
|
||||||
lexer.bump(2);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if lexer.remainder().starts_with("*/") {
|
|
||||||
comment_depth -= 1;
|
|
||||||
lexer.bump(2);
|
|
||||||
if comment_depth == 0 {
|
|
||||||
return Some(());
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
lexer.bump(next_char.len_utf8());
|
|
||||||
}
|
|
||||||
// Unterminated comment
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Called for every `{`.
|
|
||||||
///
|
|
||||||
/// This method either emits an opening brace or token for `cppblock`,
|
|
||||||
/// depending on lexer's current state.
|
|
||||||
fn handle_brace(lexer: &mut Lexer<Token>) -> Option<BraceKind> {
|
|
||||||
match lexer.extras.mode {
|
|
||||||
LexerMode::Normal => Some(BraceKind::Normal),
|
|
||||||
|
|
||||||
LexerMode::AwaitingCppBlock => {
|
|
||||||
lexer.extras.mode = LexerMode::Normal;
|
|
||||||
consume_cpp_block(lexer);
|
|
||||||
Some(BraceKind::CppBlock)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consumes a complete C++ block, handling:
|
|
||||||
/// - Nested `{...}` pairs
|
|
||||||
/// - String literals (`"..."` and `'...'`), including escaped quotes
|
|
||||||
/// - Line comments (`// ...\n`)
|
|
||||||
/// - Block comments (`/* ... */`)
|
|
||||||
///
|
|
||||||
/// Leaves the lexer positioned immediately after the closing `}` of the block.
|
|
||||||
/// The opening `{` must have already been consumed by the caller.
|
|
||||||
fn consume_cpp_block(lexer: &mut Lexer<Token>) {
|
|
||||||
let mut depth = 1;
|
|
||||||
while let Some(ch) = lexer.remainder().chars().next() {
|
|
||||||
match ch {
|
|
||||||
'{' => {
|
|
||||||
depth += 1;
|
|
||||||
lexer.bump(1);
|
|
||||||
}
|
|
||||||
'}' => {
|
|
||||||
depth -= 1;
|
|
||||||
lexer.bump(1);
|
|
||||||
if depth == 0 {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'/' if lexer.remainder().starts_with("/*") => {
|
|
||||||
lexer.bump(2); // consuming two-byte sequence `/*`
|
|
||||||
consume_c_comment(lexer)
|
|
||||||
}
|
|
||||||
'/' if lexer.remainder().starts_with("//") => {
|
|
||||||
lexer.bump(2); // consuming two-byte sequence `//`
|
|
||||||
while let Some(c) = lexer.remainder().chars().next() {
|
|
||||||
lexer.bump(c.len_utf8());
|
|
||||||
if c == '\n' {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'"' | '\'' => {
|
|
||||||
lexer.bump(1); // skip `'` or `"`
|
|
||||||
consume_string_literal(lexer, ch);
|
|
||||||
}
|
|
||||||
_ => lexer.bump(ch.len_utf8()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume over a C-style `/* ... */` comment (without nesting).
|
|
||||||
///
|
|
||||||
/// Assumes that opener `/*` is already consumed.
|
|
||||||
fn consume_c_comment(lexer: &mut Lexer<Token>) {
|
|
||||||
while let Some(next_character) = lexer.remainder().chars().next() {
|
|
||||||
if lexer.remainder().starts_with("*/") {
|
|
||||||
lexer.bump(2);
|
|
||||||
break;
|
|
||||||
} else {
|
|
||||||
lexer.bump(next_character.len_utf8());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume a string literal from C++ code.
|
|
||||||
///
|
|
||||||
/// Assumes that opening quotation mark is already consumed.
|
|
||||||
fn consume_string_literal(lexer: &mut Lexer<Token>, delimiter: char) {
|
|
||||||
while let Some(next_character) = lexer.remainder().chars().next() {
|
|
||||||
lexer.bump(next_character.len_utf8());
|
|
||||||
if next_character == '\\' {
|
|
||||||
// Skip the escaped character
|
|
||||||
if let Some(next) = lexer.remainder().chars().next() {
|
|
||||||
lexer.bump(next.len_utf8());
|
|
||||||
}
|
|
||||||
} else if next_character == delimiter {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -2,7 +2,8 @@
|
|||||||
//!
|
//!
|
||||||
//! Converts raw source text into a lossless, position-aware stream of lexical
|
//! Converts raw source text into a lossless, position-aware stream of lexical
|
||||||
//! [`Token`]s, grouped *per physical line*, and returns it as
|
//! [`Token`]s, grouped *per physical line*, and returns it as
|
||||||
//! a [`TokenizedFile`].
|
//! a [`TokenizedFile`]. A trailing newline terminates the last physical line
|
||||||
|
//! rather than introducing an additional empty line.
|
||||||
//!
|
//!
|
||||||
//! Design goals:
|
//! Design goals:
|
||||||
//!
|
//!
|
||||||
@ -12,191 +13,184 @@
|
|||||||
//! precompute lengths of each token in that encoding, making interfacing
|
//! precompute lengths of each token in that encoding, making interfacing
|
||||||
//! easier.
|
//! easier.
|
||||||
//!
|
//!
|
||||||
//! ## Iteration over tokens
|
|
||||||
//!
|
|
||||||
//! For simplicity we've moved out code for iterating over tokens of
|
|
||||||
//! [`TokenizedFile`] into a separate submodule [`iterator`].
|
|
||||||
//!
|
|
||||||
//! ## Opt-in debug helpers
|
//! ## Opt-in debug helpers
|
||||||
//!
|
//!
|
||||||
//! Extra diagnostics become available in **debug builds** or when the crate is
|
//! Extra diagnostics become available in **debug builds** or when the crate is
|
||||||
//! compiled with `debug` feature enabled. They live in the [`debug_tools`]
|
//! compiled with `debug` feature enabled. They live in the [`debug_tools`]
|
||||||
//! extension trait, implemented for [`TokenizedFile`].
|
//! extension trait, implemented for [`TokenizedFile`].
|
||||||
//!
|
|
||||||
//! ```rust
|
|
||||||
//! // bring the trait into scope
|
|
||||||
//! use lexer::DebugTools;
|
|
||||||
//!
|
|
||||||
//! let file = TokenizedFile::from_str("local int myValue;");
|
|
||||||
//! file.debug_dump(); // pretty-print token layout
|
|
||||||
//! let text = file.to_source(); // reconstruct original text
|
|
||||||
//! ```
|
|
||||||
|
|
||||||
mod debug_tools;
|
mod queries;
|
||||||
mod iterator;
|
mod raw_lexer;
|
||||||
mod lexing;
|
#[cfg(test)]
|
||||||
|
mod tests;
|
||||||
|
mod token;
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
|
|
||||||
use logos::Logos;
|
use logos::Logos;
|
||||||
|
|
||||||
#[cfg(any(debug_assertions, feature = "debug"))]
|
use raw_lexer::RawToken;
|
||||||
pub use debug_tools::DebugTools;
|
|
||||||
pub use iterator::Tokens;
|
pub use raw_lexer::BraceKind;
|
||||||
pub use lexing::{BraceKind, Token};
|
pub use token::Keyword;
|
||||||
|
pub use token::Token;
|
||||||
|
|
||||||
/// Empirically chosen starting size for token buffer (used during tokenization)
|
/// Empirically chosen starting size for token buffer (used during tokenization)
|
||||||
/// that provides good performance.
|
/// that provides good performance.
|
||||||
const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000;
|
const DEFAULT_TOKEN_BUFFER_CAPACITY: usize = 20_000;
|
||||||
|
|
||||||
/// A slice tagged with its token kind plus two length counters.
|
// TODO: check this!!!
|
||||||
|
/// Visible fragment of a token on one physical line.
|
||||||
///
|
///
|
||||||
/// *No absolute coordinates* are stored - they are recomputed per line.
|
/// `columns` is an end-exclusive range inside the string returned by
|
||||||
#[derive(Debug, Hash, Clone, Copy, PartialEq, Eq)]
|
/// [`TokenizedFile::line_text`] for that line.
|
||||||
pub struct TokenPiece<'src> {
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||||
/// Token, represented by this [`TokenPiece`].
|
pub struct VisibleLineSpan {
|
||||||
|
pub line: usize,
|
||||||
|
pub columns: std::ops::Range<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A token together with its source text and precomputed UTF-16 length.
|
||||||
|
///
|
||||||
|
/// It does not store an absolute file position.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub struct TokenData<'src> {
|
||||||
|
/// Kind of token that was lexed.
|
||||||
pub token: Token,
|
pub token: Token,
|
||||||
/// Underlying text that was lexed as the corresponding token.
|
/// Underlying text that was lexed as the corresponding token.
|
||||||
pub lexeme: &'src str,
|
pub lexeme: &'src str,
|
||||||
/// Length of the token in UTF-16 code units for the needs of easy seeking
|
/// Length of the token in UTF-16 code units for the needs of easy seeking
|
||||||
/// using given LSP cursor coordinates (line + UTF-16 offset).
|
/// using given LSP cursor coordinates (line + UTF-16 offset).
|
||||||
/// Precomputed for convenience.
|
/// Precomputed for convenience.
|
||||||
pub length_utf16: usize,
|
pub utf16_length: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Defines location of a token inside [`TokenizedFile`] in a form convenient
|
/// 0-based index of a token within the file-wide token buffer.
|
||||||
/// for communicating through LSP.
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)]
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
pub struct TokenPosition(pub usize);
|
||||||
pub enum TokenLocation {
|
|
||||||
/// Actual position of some token in the file.
|
|
||||||
Position {
|
|
||||||
/// 0-based line number.
|
|
||||||
line: usize,
|
|
||||||
/// 0-based index of a token in the line, possibly including the token that
|
|
||||||
/// has continued from the previous line.
|
|
||||||
///
|
|
||||||
/// Columns count tokens, not bytes or chars.
|
|
||||||
column: usize,
|
|
||||||
},
|
|
||||||
/// Position af the end-of-file.
|
|
||||||
EndOfFile,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A tokenized, lossless representation of an UnrealScript source file.
|
/// A tokenized, lossless representation of an `UnrealScript` source file.
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct TokenizedFile<'src> {
|
pub struct TokenizedFile<'src> {
|
||||||
/// Arena of every token span in this file.
|
/// Arena of every token span in this file.
|
||||||
buffer: Vec<TokenPiece<'src>>,
|
buffer: Vec<TokenData<'src>>,
|
||||||
/// Mapping that provides an easy and efficient access to tokens by
|
/// Mapping from physical line number to the tokens that belong to it.
|
||||||
/// line number.
|
|
||||||
lines: Vec<Line>,
|
lines: Vec<Line>,
|
||||||
|
/// Mapping token index to ranges of bytes that correspond to
|
||||||
|
/// visible characters (i.e. all non line terminators) in its lines.
|
||||||
|
///
|
||||||
|
/// Records only exists for multiline tokens and ranges can be empty for
|
||||||
|
/// lines that only contain line break boundary.
|
||||||
|
multi_line_map: HashMap<BufferIndex, Vec<VisibleByteRange>>,
|
||||||
/// Simple flag for marking erroneous state.
|
/// Simple flag for marking erroneous state.
|
||||||
had_errors: bool,
|
had_errors: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mutable state that encapsulates data needed during the tokenization loop.
|
/// An immutable iterator over all tokens in a [`TokenizedFile`], preserving
|
||||||
|
/// their order of appearance in the original source file.
|
||||||
///
|
///
|
||||||
/// Access to stored tokens is provided through the [`iterator::Tokens`]
|
/// After exhaustion it keeps returning [`None`].
|
||||||
/// iterator.
|
#[must_use]
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct Tokens<'file, 'src> {
|
||||||
|
/// Position of the next token to be returned in the canonical file-wide
|
||||||
|
/// token arena.
|
||||||
|
cursor: TokenPosition,
|
||||||
|
/// [`TokenizedFile`] whose tokens we're iterating over.
|
||||||
|
source_file: &'file TokenizedFile<'src>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Type for referring to line numbers.
|
||||||
|
type LineNumber = usize;
|
||||||
|
|
||||||
|
/// Type for specific tokens inside each [`Line`].
|
||||||
|
type BufferIndex = usize;
|
||||||
|
|
||||||
|
/// Type for describing sub-range of visible characters of a single line for
|
||||||
|
/// some token.
|
||||||
|
type VisibleByteRange = Range<usize>;
|
||||||
|
|
||||||
|
/// Representation of a single physical line of the source file.
|
||||||
|
///
|
||||||
|
/// Uses ranges instead of slices to avoid a self-referential relationship
|
||||||
|
/// with [`TokenizedFile`], which Rust forbids.
|
||||||
|
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
||||||
|
struct Line {
|
||||||
|
/// Token that began on an earlier line (`None` for standalone lines).
|
||||||
|
continued_from: Option<LineNumber>,
|
||||||
|
/// Contiguous tokens that started on this line (`start >= end` iff empty).
|
||||||
|
local_range: Range<BufferIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutable state used while tokenizing a source file.
|
||||||
|
#[derive(Debug)]
|
||||||
struct Tokenizer<'src> {
|
struct Tokenizer<'src> {
|
||||||
/// Arena that owns every [`TokenPiece`] produced for the file.
|
/// Arena that owns every [`TokenData`] produced for the file.
|
||||||
buffer: Vec<TokenPiece<'src>>,
|
buffer: Vec<TokenData<'src>>,
|
||||||
/// Mapping from physical line number to the tokens that belong to it.
|
/// Mapping from physical line number to the tokens that belong to it.
|
||||||
lines: Vec<Line>,
|
lines: Vec<Line>,
|
||||||
/// The current 0-based physical line number.
|
/// Mapping token index to ranges of bytes that correspond to
|
||||||
|
/// visible characters in its lines.
|
||||||
|
multi_line_map: HashMap<BufferIndex, Vec<VisibleByteRange>>,
|
||||||
|
/// The 0-based physical line number that is currently being scanned.
|
||||||
line_number: usize,
|
line_number: usize,
|
||||||
/// Index in [`Tokenizer::buffer`] where the current *line* starts.
|
/// Points to the first token (index in [`Tokenizer::buffer`]) not yet
|
||||||
slice_start_index: usize,
|
/// committed to `lines`, e.g. where the current *line* starts.
|
||||||
|
uncommitted_start_index: usize,
|
||||||
/// When a multi-line token is being scanned, stores the 0-based line
|
/// When a multi-line token is being scanned, stores the 0-based line
|
||||||
/// on which it started; [`None`] otherwise.
|
/// on which it started; [`None`] otherwise.
|
||||||
///
|
///
|
||||||
/// `Some(line_idx)` iff the current line is within a multi-line token that
|
/// `Some(line_number)` iff the current line is within a multi-line token
|
||||||
/// started on `line_idx`; it is consumed exactly once by
|
/// that started on `line_number`; it is consumed exactly once by
|
||||||
/// [`Self::commit_current_line`].
|
/// [`Self::commit_current_line`].
|
||||||
multi_line_start: Option<usize>,
|
multi_line_start_line: Option<LineNumber>,
|
||||||
/// Set to [`true`] if the lexer reported any error tokens.
|
/// Set to `true` if the lexer reported any error tokens.
|
||||||
had_errors: bool,
|
had_errors: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> TokenizedFile<'src> {
|
impl<'src> TokenizedFile<'src> {
|
||||||
/// Tokenize `source` and return a fresh [`TokenizedFile`].
|
/// Tokenizes `source` and returns a fresh [`TokenizedFile`].
|
||||||
///
|
///
|
||||||
/// ## Examples
|
/// Its output is lossless and groups resulting tokens by physical lines.
|
||||||
///
|
/// Error spans are preserved as [`Token::Error`].
|
||||||
/// ```rust
|
|
||||||
/// let source_text = "2 + 2 * 2".to_string();
|
|
||||||
/// let tokenized_file = TokenizedFile::from_str(&source_text);
|
|
||||||
/// ```
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn from_str(source: &'src str) -> TokenizedFile<'src> {
|
pub fn tokenize(source: &'src str) -> Self {
|
||||||
let mut tokenizer = Self::builder();
|
let mut tokenizer = Tokenizer::new();
|
||||||
let mut lexer = Token::lexer(source);
|
let mut lexer = RawToken::lexer(source);
|
||||||
|
|
||||||
while let Some(token_result) = lexer.next() {
|
while let Some(token_result) = lexer.next() {
|
||||||
// Add `Token:Error` manually, since Logos won't do it for us.
|
// Add `Token::Error` manually, since Logos won't do it for us.
|
||||||
let token = token_result.unwrap_or_else(|_| {
|
let token = token_result.unwrap_or_else(|()| {
|
||||||
tokenizer.had_errors = true;
|
tokenizer.had_errors = true;
|
||||||
Token::Error
|
RawToken::Error
|
||||||
});
|
});
|
||||||
let token_piece = make_token_piece(token, lexer.slice());
|
let token_piece = make_token_data(Token::from(token), lexer.slice());
|
||||||
tokenizer.process_token_piece(token_piece);
|
tokenizer.process_token_piece(token_piece);
|
||||||
}
|
}
|
||||||
tokenizer.into_tokenized_file()
|
tokenizer.into_tokenized_file()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns [`true`] if any erroneous tokens were produced during building
|
/// Returns `true` if tokenization produced any error tokens.
|
||||||
/// of this [`TokenizedFile`].
|
#[must_use]
|
||||||
///
|
pub const fn has_errors(&self) -> bool {
|
||||||
/// ## Examples
|
|
||||||
///
|
|
||||||
/// ```rust
|
|
||||||
/// let tokenized_file = TokenizedFile::from_str("function test() {}");
|
|
||||||
/// if tokenized_file.has_errors() {
|
|
||||||
/// println!("Error while parsing file.");
|
|
||||||
/// }
|
|
||||||
/// ```
|
|
||||||
#[inline]
|
|
||||||
pub fn has_errors(&self) -> bool {
|
|
||||||
self.had_errors
|
self.had_errors
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create an empty tokenizer state with tuned buffer capacity.
|
/// Returns an iterator over all contained tokens in the order they appear
|
||||||
fn builder() -> Tokenizer<'src> {
|
/// in the original source file.
|
||||||
Tokenizer {
|
///
|
||||||
buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY),
|
/// Returns pairs of position and token data: `(TokenPosition, TokenData)`.
|
||||||
lines: Vec::new(),
|
pub const fn iter(&self) -> Tokens<'_, 'src> {
|
||||||
line_number: 0,
|
Tokens::new(self)
|
||||||
slice_start_index: 0,
|
|
||||||
multi_line_start: None,
|
|
||||||
had_errors: false,
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Type for indexing lines in a [`TokenizedFile`].
|
|
||||||
type LineIdx = usize;
|
|
||||||
|
|
||||||
/// Type for specific tokens inside each [`Line`].
|
|
||||||
type TokenIdx = usize;
|
|
||||||
|
|
||||||
/// Representation of a single physical line of the source file.
|
|
||||||
///
|
|
||||||
/// [`Range<TokenIdx>`] are used instead of slices to avoid creating
|
|
||||||
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
|
|
||||||
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
|
|
||||||
struct Line {
|
|
||||||
/// Token that began on an earlier line (`None` for standalone lines).
|
|
||||||
continued_from: Option<LineIdx>,
|
|
||||||
/// Contiguous tokens that started on this line (`start >= end` iff empty).
|
|
||||||
local_range: Range<TokenIdx>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Line {
|
impl Line {
|
||||||
/// Creates a standalone line that owns a contiguous slice in
|
/// Creates a standalone line that owns a contiguous slice in
|
||||||
/// the [`TokenizedFile::buffer`] arena.
|
/// the [`TokenizedFile::buffer`] arena.
|
||||||
#[inline]
|
const fn standalone(locals: Range<BufferIndex>) -> Self {
|
||||||
fn standalone(locals: Range<TokenIdx>) -> Line {
|
Self {
|
||||||
Line {
|
|
||||||
continued_from: None,
|
continued_from: None,
|
||||||
local_range: locals,
|
local_range: locals,
|
||||||
}
|
}
|
||||||
@ -204,9 +198,8 @@ impl Line {
|
|||||||
|
|
||||||
/// Creates a line that is part of a multi-line token started on
|
/// Creates a line that is part of a multi-line token started on
|
||||||
/// another line, referencing the 0-based index of its origin.
|
/// another line, referencing the 0-based index of its origin.
|
||||||
#[inline]
|
const fn continued(carried: LineNumber) -> Self {
|
||||||
fn spanned(carried: LineIdx) -> Line {
|
Self {
|
||||||
Line {
|
|
||||||
continued_from: Some(carried),
|
continued_from: Some(carried),
|
||||||
local_range: 0..0,
|
local_range: 0..0,
|
||||||
}
|
}
|
||||||
@ -214,9 +207,8 @@ impl Line {
|
|||||||
|
|
||||||
/// Creates a line that is part of a multi-line token started on
|
/// Creates a line that is part of a multi-line token started on
|
||||||
/// another line and also contains additional tokens local to itself.
|
/// another line and also contains additional tokens local to itself.
|
||||||
#[inline]
|
const fn continued_with_tokens(carried: LineNumber, locals: Range<BufferIndex>) -> Self {
|
||||||
fn spanned_with_tokens(carried: LineIdx, locals: Range<TokenIdx>) -> Line {
|
Self {
|
||||||
Line {
|
|
||||||
continued_from: Some(carried),
|
continued_from: Some(carried),
|
||||||
local_range: locals,
|
local_range: locals,
|
||||||
}
|
}
|
||||||
@ -227,29 +219,31 @@ impl Line {
|
|||||||
///
|
///
|
||||||
/// [`None`] means there are no such tokens. Otherwise range is guaranteed
|
/// [`None`] means there are no such tokens. Otherwise range is guaranteed
|
||||||
/// to not be empty.
|
/// to not be empty.
|
||||||
#[inline]
|
fn local_range(&self) -> Option<Range<BufferIndex>> {
|
||||||
fn local_range(&self) -> Option<Range<TokenIdx>> {
|
|
||||||
if self.local_range.is_empty() {
|
if self.local_range.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
Some(self.local_range.clone())
|
Some(self.local_range.clone())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of tokens on this line.
|
|
||||||
///
|
|
||||||
/// Counts both tokens that started on this line and tokens that continued
|
|
||||||
/// from previous one.
|
|
||||||
#[inline]
|
|
||||||
fn len(&self) -> usize {
|
|
||||||
(if self.continued_from.is_some() { 1 } else { 0 })
|
|
||||||
+ (self.local_range.end - self.local_range.start)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> Tokenizer<'src> {
|
impl<'src> Tokenizer<'src> {
|
||||||
|
/// Returns an empty tokenizer state.
|
||||||
|
fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
buffer: Vec::with_capacity(DEFAULT_TOKEN_BUFFER_CAPACITY),
|
||||||
|
lines: Vec::new(),
|
||||||
|
multi_line_map: HashMap::new(),
|
||||||
|
line_number: 0,
|
||||||
|
uncommitted_start_index: 0,
|
||||||
|
multi_line_start_line: None,
|
||||||
|
had_errors: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Handles a token span and dispatches to the appropriate handler.
|
/// Handles a token span and dispatches to the appropriate handler.
|
||||||
fn process_token_piece(&mut self, token_piece: TokenPiece<'src>) {
|
fn process_token_piece(&mut self, token_piece: TokenData<'src>) {
|
||||||
if token_piece.token.can_span_lines() {
|
if token_piece.token.can_span_lines() {
|
||||||
self.process_multi_line_token(token_piece);
|
self.process_multi_line_token(token_piece);
|
||||||
} else {
|
} else {
|
||||||
@ -259,7 +253,7 @@ impl<'src> Tokenizer<'src> {
|
|||||||
|
|
||||||
/// Handles simple tokens that *never* span multiple lines, allowing us to
|
/// Handles simple tokens that *never* span multiple lines, allowing us to
|
||||||
/// skip a lot of work.
|
/// skip a lot of work.
|
||||||
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
fn process_single_line_token(&mut self, token_piece: TokenData<'src>) {
|
||||||
if token_piece.token.is_newline() {
|
if token_piece.token.is_newline() {
|
||||||
self.line_number += 1;
|
self.line_number += 1;
|
||||||
self.buffer.push(token_piece);
|
self.buffer.push(token_piece);
|
||||||
@ -270,34 +264,40 @@ impl<'src> Tokenizer<'src> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Handles tokens that might contain one or more newline characters.
|
/// Handles tokens that might contain one or more newline characters.
|
||||||
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
|
fn process_multi_line_token(&mut self, token_piece: TokenData<'src>) {
|
||||||
let start_line = self.line_number;
|
let start_line = self.line_number;
|
||||||
let newline_count = count_line_breaks(token_piece.lexeme);
|
let line_break_map = split_visible_line_segments(token_piece.lexeme);
|
||||||
|
let newline_count = line_break_map.len().saturating_sub(1);
|
||||||
|
|
||||||
// Did this token end in a newline?
|
// Needed for unterminated multi-line error tokens that reach EOF right
|
||||||
// This can happen if this is an `Error` token that ends the file.
|
// after a line break.
|
||||||
let ends_with_newline =
|
let ends_with_newline =
|
||||||
token_piece.lexeme.ends_with('\n') || token_piece.lexeme.ends_with('\r');
|
token_piece.lexeme.ends_with('\n') || token_piece.lexeme.ends_with('\r');
|
||||||
|
|
||||||
|
let multi_line_token_index = self.buffer.len();
|
||||||
self.buffer.push(token_piece);
|
self.buffer.push(token_piece);
|
||||||
// We only need to commit the line if this token actually ended the line
|
if !line_break_map.is_empty() {
|
||||||
|
self.multi_line_map
|
||||||
|
.insert(multi_line_token_index, line_break_map);
|
||||||
|
}
|
||||||
|
// A line is committed only once the token stream has actually crossed
|
||||||
|
// a physical line boundary.
|
||||||
if newline_count > 0 {
|
if newline_count > 0 {
|
||||||
|
// This clears `multi_line_start_line`
|
||||||
self.commit_current_line();
|
self.commit_current_line();
|
||||||
// We only need to insert one `Line::spanned(start_line)` per
|
// We only need to insert one `Line::continued(start_line)` per
|
||||||
// *interior* line:
|
// *interior* line:
|
||||||
//
|
//
|
||||||
// standalone | local int i = /* Now we start long comment
|
// standalone | local int i = /* Now we start long comment
|
||||||
// spanned | with three line breaks and *exactly* two
|
// continued | with three line breaks and *exactly* two
|
||||||
// spanned | inner lines that contain nothing but
|
// continued | inner lines that contain nothing but
|
||||||
// spanned_with_tokens | comment bytes! */ = 0;
|
// continued_with_tokens| comment bytes! */ = 0;
|
||||||
let inner_lines_count = newline_count - 1;
|
let inner_lines_count = newline_count - 1;
|
||||||
for _ in 0..inner_lines_count {
|
for _ in 0..inner_lines_count {
|
||||||
self.lines.push(Line::spanned(start_line));
|
self.lines.push(Line::continued(start_line));
|
||||||
}
|
}
|
||||||
// This is called *after* `commit_current_line()` cleared previous
|
self.multi_line_start_line = if ends_with_newline {
|
||||||
// stored value
|
None
|
||||||
self.multi_line_start = if ends_with_newline {
|
|
||||||
None // we're done at this point
|
|
||||||
} else {
|
} else {
|
||||||
Some(start_line)
|
Some(start_line)
|
||||||
};
|
};
|
||||||
@ -309,32 +309,34 @@ impl<'src> Tokenizer<'src> {
|
|||||||
/// Commits the tokens of the current physical line into `self.lines`.
|
/// Commits the tokens of the current physical line into `self.lines`.
|
||||||
fn commit_current_line(&mut self) {
|
fn commit_current_line(&mut self) {
|
||||||
let slice_end = self.buffer.len();
|
let slice_end = self.buffer.len();
|
||||||
if slice_end > self.slice_start_index {
|
// A trailing newline terminates the current physical line rather than
|
||||||
let slice = self.slice_start_index..slice_end;
|
// creating an additional empty line entry.
|
||||||
|
if slice_end > self.uncommitted_start_index {
|
||||||
|
let slice = self.uncommitted_start_index..slice_end;
|
||||||
|
|
||||||
// If we were in the middle of a multi-line token, we
|
// If we were in the middle of a multi-line token, we
|
||||||
// *always* consume `multi_line_start` here, ensuring that each call
|
// *always* consume `multi_line_start` here, ensuring that each call
|
||||||
// to `commit_current_line()` only applies it once.
|
// to `commit_current_line()` only applies it once.
|
||||||
// This guarantees no "bleed" between adjacent multi-line tokens.
|
// This guarantees no "bleed" between adjacent multi-line tokens.
|
||||||
if let Some(from) = self.multi_line_start.take() {
|
if let Some(from) = self.multi_line_start_line.take() {
|
||||||
self.lines.push(Line::spanned_with_tokens(from, slice));
|
self.lines.push(Line::continued_with_tokens(from, slice));
|
||||||
} else {
|
} else {
|
||||||
self.lines.push(Line::standalone(slice));
|
self.lines.push(Line::standalone(slice));
|
||||||
}
|
}
|
||||||
self.slice_start_index = slice_end;
|
self.uncommitted_start_index = slice_end;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Finishes tokenization, converting accumulated data into
|
/// Finishes tokenization, converting accumulated data into
|
||||||
/// [`TokenizedFile`].
|
/// [`TokenizedFile`].
|
||||||
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
|
||||||
// Flush trailing tokens for which `commit` wasn't auto triggered
|
// Commits the final line when the file does not end with a newline.
|
||||||
self.commit_current_line();
|
self.commit_current_line();
|
||||||
// If we still have a `multi_line_start`
|
// If we still have a `multi_line_start`
|
||||||
// (i.e. a pure multi-line token with no local tokens on its last line),
|
// (i.e. a pure multi-line token with no local tokens on its last line),
|
||||||
// push a bare `Line::spanned` entry.
|
// push a bare `Line::continued` entry.
|
||||||
if let Some(from) = self.multi_line_start.take() {
|
if let Some(from) = self.multi_line_start_line.take() {
|
||||||
self.lines.push(Line::spanned(from));
|
self.lines.push(Line::continued(from));
|
||||||
}
|
}
|
||||||
|
|
||||||
self.buffer.shrink_to_fit();
|
self.buffer.shrink_to_fit();
|
||||||
@ -343,40 +345,105 @@ impl<'src> Tokenizer<'src> {
|
|||||||
TokenizedFile {
|
TokenizedFile {
|
||||||
buffer: self.buffer,
|
buffer: self.buffer,
|
||||||
lines: self.lines,
|
lines: self.lines,
|
||||||
|
multi_line_map: self.multi_line_map,
|
||||||
had_errors: self.had_errors,
|
had_errors: self.had_errors,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> {
|
fn make_token_data(token: Token, text: &str) -> TokenData<'_> {
|
||||||
let length_utf16 = text.encode_utf16().count();
|
let length_utf16 = text.encode_utf16().count();
|
||||||
TokenPiece {
|
TokenData {
|
||||||
lexeme: text,
|
lexeme: text,
|
||||||
token,
|
token,
|
||||||
length_utf16,
|
utf16_length: length_utf16,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Counts the number of newlines in given text.
|
/// Returns byte ranges of visible text characters for each physical line
|
||||||
fn count_line_breaks(text: &str) -> usize {
|
/// spanned by `text`.
|
||||||
let mut bytes_iterator = text.as_bytes().iter().peekable();
|
///
|
||||||
let mut newline_count = 0;
|
/// Returns an empty vector if `text` contains no line breaks.
|
||||||
while let Some(&next_byte) = bytes_iterator.next() {
|
fn split_visible_line_segments(text: &str) -> Vec<Range<usize>> {
|
||||||
|
let bytes = text.as_bytes();
|
||||||
|
let mut segments = Vec::new();
|
||||||
|
|
||||||
|
let mut segment_start = 0usize;
|
||||||
|
let mut saw_line_break = false;
|
||||||
|
let mut bytes_iterator = bytes.iter().enumerate().peekable();
|
||||||
|
while let Some((next_byte_index, &next_byte)) = bytes_iterator.next() {
|
||||||
// Logos' regex rule is "\r\n|\n|\r", so we agree with it on new line
|
// Logos' regex rule is "\r\n|\n|\r", so we agree with it on new line
|
||||||
// character treatment
|
// character treatment
|
||||||
match next_byte {
|
match next_byte {
|
||||||
b'\r' => {
|
b'\r' => {
|
||||||
newline_count += 1;
|
saw_line_break = true;
|
||||||
if let Some(&&b'\n') = bytes_iterator.peek() {
|
let visible_end = next_byte_index;
|
||||||
// skip the '\n' in a CRLF
|
let next_start =
|
||||||
bytes_iterator.next();
|
if let Some((next_line_break_index, b'\n')) = bytes_iterator.peek().copied() {
|
||||||
}
|
bytes_iterator.next(); // consume '\n' of `\r\n`
|
||||||
|
next_line_break_index + 1
|
||||||
|
} else {
|
||||||
|
next_byte_index + 1
|
||||||
|
};
|
||||||
|
segments.push(segment_start..visible_end);
|
||||||
|
segment_start = next_start;
|
||||||
}
|
}
|
||||||
b'\n' => {
|
b'\n' => {
|
||||||
newline_count += 1;
|
saw_line_break = true;
|
||||||
|
let visible_end = next_byte_index;
|
||||||
|
segments.push(segment_start..visible_end);
|
||||||
|
segment_start = next_byte_index + 1;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
newline_count
|
// If the token contained at least one line break, include the visible
|
||||||
|
// segment of its final physical line as well. This may be empty, e.g.
|
||||||
|
// for text ending with '\n' or '\r\n'.
|
||||||
|
if saw_line_break {
|
||||||
|
segments.push(segment_start..bytes.len());
|
||||||
|
}
|
||||||
|
segments
|
||||||
|
}
|
||||||
|
|
||||||
|
// Because once `cursor` moves past the end of `buffer`, it can never become
|
||||||
|
// valid again.
|
||||||
|
impl std::iter::FusedIterator for Tokens<'_, '_> {}
|
||||||
|
|
||||||
|
impl<'file, 'src> Tokens<'file, 'src> {
|
||||||
|
/// Advances the iterator cursor by one token.
|
||||||
|
const fn advance(&mut self) {
|
||||||
|
self.cursor.0 += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new iterator.
|
||||||
|
const fn new(source_file: &'file TokenizedFile<'src>) -> Self {
|
||||||
|
Self {
|
||||||
|
source_file,
|
||||||
|
cursor: TokenPosition(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src> Iterator for Tokens<'_, 'src> {
|
||||||
|
type Item = (TokenPosition, TokenData<'src>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
if let Some(&token_piece) = self.source_file.buffer.get(self.cursor.0) {
|
||||||
|
let position = self.cursor;
|
||||||
|
self.advance();
|
||||||
|
Some((position, token_piece))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'file, 'src> IntoIterator for &'file TokenizedFile<'src> {
|
||||||
|
type Item = (TokenPosition, TokenData<'src>);
|
||||||
|
type IntoIter = Tokens<'file, 'src>;
|
||||||
|
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
self.iter()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
291
rottlib/src/lexer/queries.rs
Normal file
291
rottlib/src/lexer/queries.rs
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
//! # Query helpers
|
||||||
|
//!
|
||||||
|
//! Read-only convenience APIs for inspecting a [`TokenizedFile`] without
|
||||||
|
//! exposing its internal representation.
|
||||||
|
|
||||||
|
use crate::lexer::{Line, TokenData, TokenPosition, TokenizedFile, VisibleLineSpan};
|
||||||
|
|
||||||
|
impl<'src> TokenizedFile<'src> {
|
||||||
|
/// Returns the number of physical lines stored in this file.
|
||||||
|
///
|
||||||
|
/// Empty line after the trailing newline sequence isn't counted as a line
|
||||||
|
/// by this method.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn line_count(&self) -> usize {
|
||||||
|
self.lines.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an iterator over tokens that *start* on physical line
|
||||||
|
/// `line_number`.
|
||||||
|
///
|
||||||
|
/// The yielded items are `(TokenPosition, TokenData)` pairs, matching the
|
||||||
|
/// canonical file-wide token arena.
|
||||||
|
///
|
||||||
|
/// If the line ends with a newline token, that newline token is included.
|
||||||
|
///
|
||||||
|
/// If the line begins with a carried fragment of a multi-line token that
|
||||||
|
/// started on an earlier line, that fragment is **not** yielded here.
|
||||||
|
/// Use [`TokenizedFile::line_text`] to reconstruct the visible content of
|
||||||
|
/// the full line.
|
||||||
|
///
|
||||||
|
/// If `line_number` is out of bounds, the returned iterator is empty.
|
||||||
|
#[must_use]
|
||||||
|
pub fn line_tokens(
|
||||||
|
&self,
|
||||||
|
line_number: usize,
|
||||||
|
) -> std::vec::IntoIter<(TokenPosition, TokenData<'src>)> {
|
||||||
|
let Some(line) = self.lines.get(line_number) else {
|
||||||
|
return Vec::new().into_iter();
|
||||||
|
};
|
||||||
|
let Some(local_range) = line.local_range() else {
|
||||||
|
return Vec::new().into_iter();
|
||||||
|
};
|
||||||
|
let mut out = Vec::with_capacity(local_range.len());
|
||||||
|
for buffer_index in local_range {
|
||||||
|
// Invariant:
|
||||||
|
// `Line::local_range()` is always constructed from contiguous
|
||||||
|
// slices of `self.buffer` during tokenization, so every index in
|
||||||
|
// this range must be valid for `self.buffer`.
|
||||||
|
let token_data = self.buffer[buffer_index];
|
||||||
|
out.push((TokenPosition(buffer_index), token_data));
|
||||||
|
}
|
||||||
|
out.into_iter()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the token stored at `position`, if that position is valid.
|
||||||
|
///
|
||||||
|
/// This is a direct lookup into the file-wide token buffer.
|
||||||
|
#[must_use]
|
||||||
|
pub fn token_at(&self, position: TokenPosition) -> Option<TokenData<'src>> {
|
||||||
|
self.buffer.get(position.0).copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reconstructs the visible text of physical line `line_index`.
|
||||||
|
///
|
||||||
|
/// The returned string does **not** include a trailing line terminator.
|
||||||
|
///
|
||||||
|
/// Unlike [`TokenizedFile::line_tokens`], this method includes the visible
|
||||||
|
/// fragment of a multi-line token carried from an earlier line.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] iff `line_index >= self.line_count()`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn line_text(&self, line_index: usize) -> Option<String> {
|
||||||
|
let line = self.lines.get(line_index)?;
|
||||||
|
let mut out = String::new();
|
||||||
|
|
||||||
|
if let Some(piece) = self.carried_piece_for_line(line_index) {
|
||||||
|
out.push_str(piece);
|
||||||
|
}
|
||||||
|
let Some(range) = line.local_range() else {
|
||||||
|
return Some(out);
|
||||||
|
};
|
||||||
|
for buffer_index in range.clone() {
|
||||||
|
let token_piece = self.buffer[buffer_index];
|
||||||
|
if token_piece.token.is_newline() {
|
||||||
|
// Must be last token
|
||||||
|
debug_assert_eq!(buffer_index + 1, range.end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if token_piece.token.can_span_lines()
|
||||||
|
&& let Some(first_segment) = self
|
||||||
|
.multi_line_map
|
||||||
|
.get(&buffer_index)
|
||||||
|
.and_then(|segments| segments.first())
|
||||||
|
{
|
||||||
|
out.push_str(&token_piece.lexeme[first_segment.clone()]);
|
||||||
|
// Must be last token
|
||||||
|
debug_assert_eq!(buffer_index + 1, range.end);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out.push_str(token_piece.lexeme);
|
||||||
|
}
|
||||||
|
Some(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the 0-based physical line on which the token at `position`
|
||||||
|
/// starts.
|
||||||
|
///
|
||||||
|
/// For multi-line tokens, this is the line where the token begins, not
|
||||||
|
/// every physical line it spans.
|
||||||
|
///
|
||||||
|
/// Returns `None` if `position` is out of bounds.
|
||||||
|
#[must_use]
|
||||||
|
pub fn token_line(&self, position: TokenPosition) -> Option<usize> {
|
||||||
|
// Reject invalid token positions early.
|
||||||
|
self.buffer.get(position.0)?;
|
||||||
|
|
||||||
|
let line_index = self
|
||||||
|
.lines
|
||||||
|
.partition_point(|line| self.line_search_upper_bound(line) <= position.0);
|
||||||
|
|
||||||
|
(line_index < self.lines.len()).then_some(line_index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the exclusive upper token index bound for binary-searching
|
||||||
|
/// lines by token position.
|
||||||
|
///
|
||||||
|
/// In other words: every token that "belongs" to this line in start-line
|
||||||
|
/// terms has index `< returned_value`.
|
||||||
|
fn line_search_upper_bound(&self, line: &Line) -> usize {
|
||||||
|
if let Some(local_range) = line.local_range() {
|
||||||
|
local_range.end
|
||||||
|
} else {
|
||||||
|
// Pure continuation line: it contains only the carried fragment of
|
||||||
|
// a multi-line token that started earlier.
|
||||||
|
//
|
||||||
|
// That token is always the last local token on the origin line, so
|
||||||
|
// its token index + 1 acts as the exclusive upper bound.
|
||||||
|
let origin_line = line
|
||||||
|
.continued_from
|
||||||
|
.expect("empty line entry must be a continuation line");
|
||||||
|
self.carried_token_index(origin_line)
|
||||||
|
.expect("continuation line must point to a valid origin token")
|
||||||
|
+ 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If `line_index` begins with a fragment of a multi-line token that
|
||||||
|
/// started earlier, returns the visible slice of that token for this line.
|
||||||
|
fn carried_piece_for_line(&self, line_index: usize) -> Option<&'src str> {
|
||||||
|
// Find carried, multiline token
|
||||||
|
let origin_line = self.lines.get(line_index)?.continued_from?;
|
||||||
|
let carried_token_index = self.carried_token_index(origin_line)?;
|
||||||
|
// Find right part of the multiline token's lexeme
|
||||||
|
let segments = self.multi_line_map.get(&carried_token_index)?;
|
||||||
|
let segment_index = line_index.checked_sub(origin_line)?;
|
||||||
|
let boundary = segments.get(segment_index)?;
|
||||||
|
self.buffer
|
||||||
|
.get(carried_token_index)?
|
||||||
|
.lexeme
|
||||||
|
.get(boundary.clone())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recovers the token index of the multi-line token that started on
|
||||||
|
/// `origin_line` and is carried into later lines.
|
||||||
|
///
|
||||||
|
/// In the current representation, this is always the last local token that
|
||||||
|
/// started on the origin line.
|
||||||
|
fn carried_token_index(&self, origin_line: usize) -> Option<usize> {
|
||||||
|
let range = self.lines.get(origin_line)?.local_range()?;
|
||||||
|
let token_index = range.end.checked_sub(1)?;
|
||||||
|
|
||||||
|
debug_assert!(self.buffer[token_index].token.can_span_lines());
|
||||||
|
Some(token_index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the visible per-line spans occupied by the token at `position`.
|
||||||
|
///
|
||||||
|
/// Coordinates are expressed in visible character columns inside
|
||||||
|
/// `line_text(line)`, with an exclusive end bound.
|
||||||
|
///
|
||||||
|
/// Newline-only tokens have no visible text, so they return an empty vector.
|
||||||
|
///
|
||||||
|
/// Returns `None` if `position` is invalid.
|
||||||
|
#[must_use]
|
||||||
|
pub fn token_visible_spans(&self, position: TokenPosition) -> Option<Vec<VisibleLineSpan>> {
|
||||||
|
let token_piece = self.buffer.get(position.0).copied()?;
|
||||||
|
let start_line = self.token_line(position)?;
|
||||||
|
let start_column = self.token_start_visible_column(position)?;
|
||||||
|
|
||||||
|
if token_piece.token.is_newline() {
|
||||||
|
return Some(Vec::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
// True multi-line token: reuse already computed visible byte segments,
|
||||||
|
// then convert them into visible character columns.
|
||||||
|
if let Some(segments) = self.multi_line_map.get(&position.0) {
|
||||||
|
let mut out = Vec::with_capacity(segments.len());
|
||||||
|
|
||||||
|
for (segment_index, byte_range) in segments.iter().enumerate() {
|
||||||
|
let visible_text = &token_piece.lexeme[byte_range.clone()];
|
||||||
|
let width = visible_text.chars().count();
|
||||||
|
|
||||||
|
// Empty visible fragment: skip it.
|
||||||
|
// This matters for things like a token ending with '\n'.
|
||||||
|
if width == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let line = start_line + segment_index;
|
||||||
|
|
||||||
|
// A trailing newline does not create an extra stored physical line.
|
||||||
|
if line >= self.line_count() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let column_start = if segment_index == 0 { start_column } else { 0 };
|
||||||
|
out.push(VisibleLineSpan {
|
||||||
|
line,
|
||||||
|
columns: column_start..(column_start + width),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return Some(out);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Single-line token, including "can_span_lines" tokens that happen not
|
||||||
|
// to contain a line break.
|
||||||
|
let width = token_piece.lexeme.chars().count();
|
||||||
|
Some(vec![VisibleLineSpan {
|
||||||
|
line: start_line,
|
||||||
|
columns: start_column..(start_column + width),
|
||||||
|
}])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the visible start column of the token at `position` inside
|
||||||
|
/// `line_text(token_line(position))`.
|
||||||
|
///
|
||||||
|
/// Column is measured in visible characters, excluding line terminators.
|
||||||
|
fn token_start_visible_column(&self, position: TokenPosition) -> Option<usize> {
|
||||||
|
let line_index = self.token_line(position)?;
|
||||||
|
let line = self.lines.get(line_index)?;
|
||||||
|
|
||||||
|
let mut column = self
|
||||||
|
.carried_piece_for_line(line_index)
|
||||||
|
.map_or(0, |text| text.chars().count());
|
||||||
|
|
||||||
|
let local_range = line.local_range()?;
|
||||||
|
for buffer_index in local_range {
|
||||||
|
if buffer_index == position.0 {
|
||||||
|
return Some(column);
|
||||||
|
}
|
||||||
|
|
||||||
|
let token_piece = self.buffer.get(buffer_index)?;
|
||||||
|
|
||||||
|
if token_piece.token.is_newline() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if token_piece.token.can_span_lines() && self.multi_line_map.contains_key(&buffer_index)
|
||||||
|
{
|
||||||
|
//debug_assert_eq!(buffer_index + 1, local_range.end);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
column += token_piece.lexeme.chars().count();
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
|
pub fn span_visible_on_line(&self, span: crate::ast::AstSpan) -> Option<VisibleLineSpan> {
|
||||||
|
let start = self
|
||||||
|
.token_visible_spans(span.token_from)?
|
||||||
|
.into_iter()
|
||||||
|
.next()?;
|
||||||
|
let end = self
|
||||||
|
.token_visible_spans(span.token_to)?
|
||||||
|
.into_iter()
|
||||||
|
.last()?;
|
||||||
|
|
||||||
|
if start.line != end.line {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(VisibleLineSpan {
|
||||||
|
line: start.line,
|
||||||
|
columns: start.columns.start..end.columns.end,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
632
rottlib/src/lexer/raw_lexer.rs
Normal file
632
rottlib/src/lexer/raw_lexer.rs
Normal file
@ -0,0 +1,632 @@
|
|||||||
|
//! Lexer for `UnrealScript` that understands inline `cpptext { ... }` blocks.
|
||||||
|
//!
|
||||||
|
//! ## Notable details
|
||||||
|
//!
|
||||||
|
//! Lexer for `UnrealScript` that recognizes inline `cpptext { ... }` blocks.
|
||||||
|
//!
|
||||||
|
//! In `UnrealScript`, `cpptext` lets authors embed raw C++ between braces.\
|
||||||
|
//! Because whitespace, newlines, or comments may appear between the
|
||||||
|
//! `cpptext` keyword and the opening `{`, the lexer must remember that
|
||||||
|
//! it has just seen `cpptext` - hence a state machine.
|
||||||
|
//!
|
||||||
|
//! ## Modes
|
||||||
|
//!
|
||||||
|
//! - **Normal** - ordinary `UnrealScript` `RawTokens`.
|
||||||
|
//! - **`AwaitingCppBlock`** - after `cpptext`, waiting for the next `{`.
|
||||||
|
//!
|
||||||
|
//! When that brace arrives, the lexer consumes the entire C++ block as
|
||||||
|
//! one `RawToken` (`RawToken::Brace(BraceKind::CppBlock)`), tracking nested
|
||||||
|
//! braces, strings, and comments on the way. If the closing `}` is
|
||||||
|
//! missing, everything to EOF is treated as C++; downstream parsers must
|
||||||
|
//! handle that gracefully.
|
||||||
|
|
||||||
|
use logos::Lexer;
|
||||||
|
|
||||||
|
/// Which lexer mode we're in. See the module docs for the full story.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Default)]
|
||||||
|
enum LexerMode {
|
||||||
|
/// Lexing regular `UnrealScript`.
|
||||||
|
#[default]
|
||||||
|
Normal,
|
||||||
|
/// Saw `cpptext`; waiting for the opening `{` of a C++ block.
|
||||||
|
AwaitingCppBlock,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extra per-lexer state. Currently just holds the [`LexerMode`].
|
||||||
|
///
|
||||||
|
/// This is a logos-specific implementation detail.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
|
||||||
|
pub struct LexerState {
|
||||||
|
mode: LexerMode,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Distinguishes an ordinary `{` token from one that starts
|
||||||
|
/// an embedded C++ block.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum BraceKind {
|
||||||
|
/// An ordinary `UnrealScript` `{`.
|
||||||
|
Normal,
|
||||||
|
/// A `{` that starts an embedded C++ block and consumes through its
|
||||||
|
/// matching `}`.
|
||||||
|
CppBlock,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokens produced by the `UnrealScript` lexer.
|
||||||
|
///
|
||||||
|
/// Includes both syntactic tokens and trivia such as whitespace, newlines,
|
||||||
|
/// and comments.
|
||||||
|
#[derive(logos::Logos, Debug, PartialEq, Eq, Hash, Clone, Copy)]
|
||||||
|
#[logos(extras = LexerState)]
|
||||||
|
pub enum RawToken {
|
||||||
|
// # Compiler/directive keywords
|
||||||
|
#[regex(r"(?i)#exec[^\r\n]*(?:\r\n|\n|\r)?")]
|
||||||
|
ExecDirective,
|
||||||
|
#[regex("(?i)cpptext", |lex| {
|
||||||
|
if is_next_nontrivia_left_brace(lex) {
|
||||||
|
lex.extras.mode = LexerMode::AwaitingCppBlock;
|
||||||
|
} else {
|
||||||
|
lex.extras.mode = LexerMode::Normal;
|
||||||
|
}
|
||||||
|
})]
|
||||||
|
CppText,
|
||||||
|
|
||||||
|
#[regex("(?i)cppstruct", |lex| {
|
||||||
|
if is_next_nontrivia_left_brace(lex) {
|
||||||
|
lex.extras.mode = LexerMode::AwaitingCppBlock;
|
||||||
|
} else {
|
||||||
|
lex.extras.mode = LexerMode::Normal;
|
||||||
|
}
|
||||||
|
})]
|
||||||
|
CppStruct,
|
||||||
|
// # Declaration & structural keywords
|
||||||
|
//#[regex("(?i)class")]
|
||||||
|
#[token("class", ignore(case))]
|
||||||
|
Class,
|
||||||
|
#[token("struct", ignore(case))]
|
||||||
|
Struct,
|
||||||
|
#[token("enum", ignore(case))]
|
||||||
|
Enum,
|
||||||
|
#[token("state", ignore(case))]
|
||||||
|
State,
|
||||||
|
#[token("auto", ignore(case))]
|
||||||
|
Auto,
|
||||||
|
#[token("function", ignore(case))]
|
||||||
|
Function,
|
||||||
|
#[token("event", ignore(case))]
|
||||||
|
Event,
|
||||||
|
#[token("delegate", ignore(case))]
|
||||||
|
Delegate,
|
||||||
|
#[token("var", ignore(case))]
|
||||||
|
Var,
|
||||||
|
#[token("local", ignore(case))]
|
||||||
|
Local,
|
||||||
|
|
||||||
|
// # Inheritance, interface, dependencies
|
||||||
|
#[token("extends", ignore(case))]
|
||||||
|
Extends,
|
||||||
|
#[token("dependson", ignore(case))]
|
||||||
|
DependsOn,
|
||||||
|
|
||||||
|
// # Access modifiers & properties
|
||||||
|
#[token("private", ignore(case))]
|
||||||
|
Private,
|
||||||
|
#[token("protected", ignore(case))]
|
||||||
|
Protected,
|
||||||
|
#[token("public", ignore(case))]
|
||||||
|
Public,
|
||||||
|
#[token("const", ignore(case))]
|
||||||
|
Const,
|
||||||
|
#[token("static", ignore(case))]
|
||||||
|
Static,
|
||||||
|
#[token("native", ignore(case))]
|
||||||
|
Native,
|
||||||
|
#[token("abstract", ignore(case))]
|
||||||
|
Abstract,
|
||||||
|
#[token("deprecated", ignore(case))]
|
||||||
|
Deprecated,
|
||||||
|
#[token("safereplace", ignore(case))]
|
||||||
|
SafeReplace,
|
||||||
|
#[token("exportstructs", ignore(case))]
|
||||||
|
ExportStructs,
|
||||||
|
#[token("input", ignore(case))]
|
||||||
|
Input,
|
||||||
|
|
||||||
|
// # UnrealScript metadata/specifiers
|
||||||
|
#[token("final", ignore(case))]
|
||||||
|
Final,
|
||||||
|
#[token("default", ignore(case))]
|
||||||
|
Default,
|
||||||
|
#[token("defaultproperties", ignore(case))]
|
||||||
|
DefaultProperties,
|
||||||
|
#[token("object", ignore(case))]
|
||||||
|
Object,
|
||||||
|
#[token("begin", ignore(case))]
|
||||||
|
Begin,
|
||||||
|
#[token("end", ignore(case))]
|
||||||
|
End,
|
||||||
|
#[token("optional", ignore(case))]
|
||||||
|
Optional,
|
||||||
|
#[token("config", ignore(case))]
|
||||||
|
Config,
|
||||||
|
#[token("perobjectconfig", ignore(case))]
|
||||||
|
PerObjectConfig,
|
||||||
|
#[token("globalconfig", ignore(case))]
|
||||||
|
GlobalConfig,
|
||||||
|
#[token("collapsecategories", ignore(case))]
|
||||||
|
CollapseCategories,
|
||||||
|
#[token("dontcollapsecategories", ignore(case))]
|
||||||
|
DontCollapseCategories,
|
||||||
|
#[token("hidecategories", ignore(case))]
|
||||||
|
HideCategories,
|
||||||
|
#[token("showcategories", ignore(case))]
|
||||||
|
ShowCategories,
|
||||||
|
#[token("localized", ignore(case))]
|
||||||
|
Localized,
|
||||||
|
#[token("placeable", ignore(case))]
|
||||||
|
Placeable,
|
||||||
|
#[token("notplaceable", ignore(case))]
|
||||||
|
NotPlaceable,
|
||||||
|
#[token("instanced", ignore(case))]
|
||||||
|
Instanced,
|
||||||
|
#[token("editconst", ignore(case))]
|
||||||
|
EditConst,
|
||||||
|
#[token("editconstarray", ignore(case))]
|
||||||
|
EditConstArray,
|
||||||
|
#[token("editinline", ignore(case))]
|
||||||
|
EditInline,
|
||||||
|
#[token("editinlineuse", ignore(case))]
|
||||||
|
EditInlineUse,
|
||||||
|
#[token("editinlinenew", ignore(case))]
|
||||||
|
EditInlineNew,
|
||||||
|
#[token("noteditinlinenew", ignore(case))]
|
||||||
|
NotEditInlineNew,
|
||||||
|
#[token("edfindable", ignore(case))]
|
||||||
|
EdFindable,
|
||||||
|
#[token("editinlinenotify", ignore(case))]
|
||||||
|
EditInlineNotify,
|
||||||
|
#[token("parseconfig", ignore(case))]
|
||||||
|
ParseConfig,
|
||||||
|
#[token("automated", ignore(case))]
|
||||||
|
Automated,
|
||||||
|
#[token("dynamicrecompile", ignore(case))]
|
||||||
|
DynamicRecompile,
|
||||||
|
#[token("transient", ignore(case))]
|
||||||
|
Transient,
|
||||||
|
#[token("long", ignore(case))]
|
||||||
|
Long,
|
||||||
|
#[token("operator", ignore(case))]
|
||||||
|
Operator,
|
||||||
|
#[token("preoperator", ignore(case))]
|
||||||
|
PreOperator,
|
||||||
|
#[token("postoperator", ignore(case))]
|
||||||
|
PostOperator,
|
||||||
|
#[token("simulated", ignore(case))]
|
||||||
|
Simulated,
|
||||||
|
#[token("exec", ignore(case))]
|
||||||
|
Exec,
|
||||||
|
#[token("latent", ignore(case))]
|
||||||
|
Latent,
|
||||||
|
#[token("iterator", ignore(case))]
|
||||||
|
Iterator,
|
||||||
|
#[token("out", ignore(case))]
|
||||||
|
Out,
|
||||||
|
#[token("skip", ignore(case))]
|
||||||
|
Skip,
|
||||||
|
#[token("singular", ignore(case))]
|
||||||
|
Singular,
|
||||||
|
#[token("coerce", ignore(case))]
|
||||||
|
Coerce,
|
||||||
|
#[token("assert", ignore(case))]
|
||||||
|
Assert,
|
||||||
|
#[token("ignores", ignore(case))]
|
||||||
|
Ignores,
|
||||||
|
#[token("within", ignore(case))]
|
||||||
|
Within,
|
||||||
|
#[token("init", ignore(case))]
|
||||||
|
Init,
|
||||||
|
#[token("export", ignore(case))]
|
||||||
|
Export,
|
||||||
|
#[token("noexport", ignore(case))]
|
||||||
|
NoExport,
|
||||||
|
#[token("hidedropdown", ignore(case))]
|
||||||
|
HideDropdown,
|
||||||
|
#[token("travel", ignore(case))]
|
||||||
|
Travel,
|
||||||
|
#[token("cache", ignore(case))]
|
||||||
|
Cache,
|
||||||
|
#[token("cacheexempt", ignore(case))]
|
||||||
|
CacheExempt,
|
||||||
|
|
||||||
|
// # Replication-related
|
||||||
|
#[token("reliable", ignore(case))]
|
||||||
|
Reliable,
|
||||||
|
#[token("unreliable", ignore(case))]
|
||||||
|
Unreliable,
|
||||||
|
#[token("replication", ignore(case))]
|
||||||
|
Replication,
|
||||||
|
#[token("nativereplication", ignore(case))]
|
||||||
|
NativeReplication,
|
||||||
|
|
||||||
|
// # Control-flow keywords
|
||||||
|
#[token("goto", ignore(case))]
|
||||||
|
Goto,
|
||||||
|
#[token("if", ignore(case))]
|
||||||
|
If,
|
||||||
|
#[token("else", ignore(case))]
|
||||||
|
Else,
|
||||||
|
#[token("switch", ignore(case))]
|
||||||
|
Switch,
|
||||||
|
#[token("case", ignore(case))]
|
||||||
|
Case,
|
||||||
|
#[token("for", ignore(case))]
|
||||||
|
For,
|
||||||
|
#[token("foreach", ignore(case))]
|
||||||
|
ForEach,
|
||||||
|
#[token("while", ignore(case))]
|
||||||
|
While,
|
||||||
|
#[token("do", ignore(case))]
|
||||||
|
Do,
|
||||||
|
#[token("until", ignore(case))]
|
||||||
|
Until,
|
||||||
|
#[token("break", ignore(case))]
|
||||||
|
Break,
|
||||||
|
#[token("continue", ignore(case))]
|
||||||
|
Continue,
|
||||||
|
#[token("return", ignore(case))]
|
||||||
|
Return,
|
||||||
|
|
||||||
|
// # Built-in types
|
||||||
|
#[token("int", ignore(case))]
|
||||||
|
Int,
|
||||||
|
#[token("float", ignore(case))]
|
||||||
|
Float,
|
||||||
|
#[token("bool", ignore(case))]
|
||||||
|
Bool,
|
||||||
|
#[token("byte", ignore(case))]
|
||||||
|
Byte,
|
||||||
|
#[token("string", ignore(case))]
|
||||||
|
String,
|
||||||
|
#[token("array", ignore(case))]
|
||||||
|
Array,
|
||||||
|
#[token("name", ignore(case))]
|
||||||
|
Name,
|
||||||
|
|
||||||
|
// FloatLiteral must come before IntegerLiteral and '.'
|
||||||
|
// to have higher priority.
|
||||||
|
// It also recognizes things like: `1.foo``, `1.foo.bar`, `1.2.3`.
|
||||||
|
// It has to. Because UnrealScript is a pile of-... wonderful language,
|
||||||
|
// where everything is possible.
|
||||||
|
#[regex(r"[0-9]+(?:\.(?:[0-9]+|[A-Za-z_][A-Za-z0-9_]*))+[fF]?")]
|
||||||
|
#[regex(r"(?:[0-9]*\.[0-9]+|[0-9]+\.[0-9]*)(?:[eE][+-]?[0-9]+)?[fF]?")]
|
||||||
|
#[regex(r"[0-9]+[eE][+-]?[0-9]+[fF]?")]
|
||||||
|
FloatLiteral,
|
||||||
|
|
||||||
|
#[regex(r"0b[01](?:_?[01])*")]
|
||||||
|
#[regex(r"0o[0-7](?:_?[0-7])*")]
|
||||||
|
#[regex(r"0x[0-9A-Fa-f](?:_?[0-9A-Fa-f])*")]
|
||||||
|
#[regex(r"[0-9][0-9]*")]
|
||||||
|
IntegerLiteral,
|
||||||
|
|
||||||
|
#[regex(r#""([^"\\\r\n]|\\.)*""#)]
|
||||||
|
StringLiteral,
|
||||||
|
#[regex(r"'[a-zA-Z0-9_\. \-]*'")]
|
||||||
|
NameLiteral,
|
||||||
|
#[token("true", ignore(case))]
|
||||||
|
True,
|
||||||
|
#[token("false", ignore(case))]
|
||||||
|
False,
|
||||||
|
#[token("none", ignore(case))]
|
||||||
|
None,
|
||||||
|
#[token("self", ignore(case))]
|
||||||
|
SelfValue,
|
||||||
|
#[token("new", ignore(case))]
|
||||||
|
New,
|
||||||
|
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*")]
|
||||||
|
Identifier,
|
||||||
|
|
||||||
|
// # Operations
|
||||||
|
// ## Exponentiation
|
||||||
|
#[token("**")]
|
||||||
|
Exponentiation,
|
||||||
|
// ## Unary
|
||||||
|
#[token("++")]
|
||||||
|
Increment,
|
||||||
|
#[token("--")]
|
||||||
|
Decrement,
|
||||||
|
#[token("!")]
|
||||||
|
Not,
|
||||||
|
#[token("~")]
|
||||||
|
BitwiseNot,
|
||||||
|
// ## Vector
|
||||||
|
#[token("dot", ignore(case))]
|
||||||
|
Dot,
|
||||||
|
#[token("cross", ignore(case))]
|
||||||
|
Cross,
|
||||||
|
// ## Multiplicative
|
||||||
|
#[token("*")]
|
||||||
|
Multiply,
|
||||||
|
#[token("/")]
|
||||||
|
Divide,
|
||||||
|
#[token("%")]
|
||||||
|
Modulo,
|
||||||
|
// ## Additive
|
||||||
|
#[token("+")]
|
||||||
|
Plus,
|
||||||
|
#[token("-")]
|
||||||
|
Minus,
|
||||||
|
// ## String manipulation
|
||||||
|
#[token("@")]
|
||||||
|
ConcatSpace,
|
||||||
|
#[token("$")]
|
||||||
|
Concat,
|
||||||
|
// ## Shifts
|
||||||
|
#[token("<<")]
|
||||||
|
LeftShift,
|
||||||
|
#[token(">>>")]
|
||||||
|
LogicalRightShift,
|
||||||
|
#[token(">>")]
|
||||||
|
RightShift,
|
||||||
|
// ## Relational
|
||||||
|
#[token("<")]
|
||||||
|
Less,
|
||||||
|
#[token("<=")]
|
||||||
|
LessEqual,
|
||||||
|
#[token(">")]
|
||||||
|
Greater,
|
||||||
|
#[token(">=")]
|
||||||
|
GreaterEqual,
|
||||||
|
#[token("==")]
|
||||||
|
Equal,
|
||||||
|
#[token("!=")]
|
||||||
|
NotEqual,
|
||||||
|
#[token("~=")]
|
||||||
|
ApproximatelyEqual,
|
||||||
|
#[token("clockwisefrom", ignore(case))]
|
||||||
|
ClockwiseFrom,
|
||||||
|
// ## Bitwise
|
||||||
|
#[token("&")]
|
||||||
|
BitwiseAnd,
|
||||||
|
#[token("|")]
|
||||||
|
BitwiseOr,
|
||||||
|
#[token("^")]
|
||||||
|
BitwiseXor,
|
||||||
|
// ## Logical
|
||||||
|
#[token("&&")]
|
||||||
|
LogicalAnd,
|
||||||
|
#[token("^^")]
|
||||||
|
LogicalXor,
|
||||||
|
#[token("||")]
|
||||||
|
LogicalOr,
|
||||||
|
// ## Assignments
|
||||||
|
#[token("=")]
|
||||||
|
Assign,
|
||||||
|
#[token("*=")]
|
||||||
|
MultiplyAssign,
|
||||||
|
#[token("/=")]
|
||||||
|
DivideAssign,
|
||||||
|
#[token("%=")]
|
||||||
|
ModuloAssign,
|
||||||
|
#[token("+=")]
|
||||||
|
PlusAssign,
|
||||||
|
#[token("-=")]
|
||||||
|
MinusAssign,
|
||||||
|
#[token("$=")]
|
||||||
|
ConcatAssign,
|
||||||
|
#[token("@=")]
|
||||||
|
ConcatSpaceAssign,
|
||||||
|
|
||||||
|
// # Punctuation & delimiters
|
||||||
|
#[token("(")]
|
||||||
|
LeftParenthesis,
|
||||||
|
#[token(")")]
|
||||||
|
RightParenthesis,
|
||||||
|
#[token("{", process_left_brace)]
|
||||||
|
Brace(BraceKind),
|
||||||
|
#[token("}")]
|
||||||
|
RightBrace,
|
||||||
|
#[token("[")]
|
||||||
|
LeftBracket,
|
||||||
|
#[token("]")]
|
||||||
|
RightBracket,
|
||||||
|
#[token(";")]
|
||||||
|
Semicolon,
|
||||||
|
#[token(",")]
|
||||||
|
Comma,
|
||||||
|
#[token(".")]
|
||||||
|
Period,
|
||||||
|
#[token(":")]
|
||||||
|
Colon,
|
||||||
|
#[token("#")]
|
||||||
|
Hash,
|
||||||
|
#[token("?")]
|
||||||
|
Question,
|
||||||
|
|
||||||
|
// # Comments & whitespaces
|
||||||
|
#[regex(r"//[^\r\n]*")]
|
||||||
|
LineComment,
|
||||||
|
#[regex(r"/\*", handle_block_comment)]
|
||||||
|
BlockComment,
|
||||||
|
#[regex(r"\r\n|\n|\r")]
|
||||||
|
Newline,
|
||||||
|
#[regex(r"[ \t]+")]
|
||||||
|
Whitespace,
|
||||||
|
|
||||||
|
// # Technical
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes an `UnrealScript` `/* ... */` block comment, including nested comments.
|
||||||
|
///
|
||||||
|
/// Matches the entire comment, including its delimiters.
|
||||||
|
/// If the comment is unterminated, consumes to the end of input.
|
||||||
|
fn handle_block_comment(lexer: &mut Lexer<RawToken>) {
|
||||||
|
let mut comment_depth = 1;
|
||||||
|
while let Some(next_character) = lexer.remainder().chars().next() {
|
||||||
|
if lexer.remainder().starts_with("/*") {
|
||||||
|
comment_depth += 1;
|
||||||
|
lexer.bump(2);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if lexer.remainder().starts_with("*/") {
|
||||||
|
comment_depth -= 1;
|
||||||
|
lexer.bump(2);
|
||||||
|
if comment_depth == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
lexer.bump(next_character.len_utf8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Processes `{` according to the current lexer mode.
|
||||||
|
///
|
||||||
|
/// Returns [`BraceKind::Normal`] for ordinary `UnrealScript` braces.
|
||||||
|
/// After `cpptext` or `cppstruct`, consumes the embedded C++ block and returns
|
||||||
|
/// [`BraceKind::CppBlock`].
|
||||||
|
fn process_left_brace(lexer: &mut Lexer<RawToken>) -> BraceKind {
|
||||||
|
match lexer.extras.mode {
|
||||||
|
LexerMode::Normal => BraceKind::Normal,
|
||||||
|
LexerMode::AwaitingCppBlock => {
|
||||||
|
lexer.extras.mode = LexerMode::Normal;
|
||||||
|
consume_cpp_block(lexer);
|
||||||
|
BraceKind::CppBlock
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes a complete C++ block, handling:
|
||||||
|
/// - Nested `{...}` pairs
|
||||||
|
/// - String literals (`"..."` and `'...'`), including escaped quotes
|
||||||
|
/// - Line comments (`// ...\n`)
|
||||||
|
/// - Block comments (`/* ... */`)
|
||||||
|
///
|
||||||
|
/// Leaves the lexer positioned immediately after the closing `}` of the block.
|
||||||
|
/// The opening `{` must have already been consumed by the caller.
|
||||||
|
///
|
||||||
|
/// We target UE2-era cpp blocks, so no need for anything fancy.
|
||||||
|
fn consume_cpp_block(lexer: &mut Lexer<RawToken>) {
|
||||||
|
let mut brace_depth = 1;
|
||||||
|
while let Some(next_character) = lexer.remainder().chars().next() {
|
||||||
|
match next_character {
|
||||||
|
'{' => {
|
||||||
|
brace_depth += 1;
|
||||||
|
lexer.bump(1);
|
||||||
|
}
|
||||||
|
'}' => {
|
||||||
|
brace_depth -= 1;
|
||||||
|
lexer.bump(1);
|
||||||
|
if brace_depth == 0 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'/' if lexer.remainder().starts_with("/*") => {
|
||||||
|
lexer.bump(2); // consuming two-byte sequence `/*`
|
||||||
|
consume_c_style_block_comment(lexer);
|
||||||
|
}
|
||||||
|
'/' if lexer.remainder().starts_with("//") => {
|
||||||
|
lexer.bump(2); // consuming two-byte sequence `//`
|
||||||
|
while let Some(next_character) = lexer.remainder().chars().next() {
|
||||||
|
lexer.bump(next_character.len_utf8());
|
||||||
|
if next_character == '\n' || next_character == '\r' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'"' | '\'' => {
|
||||||
|
lexer.bump(1); // skip `'` or `"`
|
||||||
|
consume_quoted_cpp_literal(lexer, next_character);
|
||||||
|
}
|
||||||
|
_ => lexer.bump(next_character.len_utf8()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes a non-nesting C-style `/* ... */` comment.
|
||||||
|
///
|
||||||
|
/// Assumes that the opening `/*` has already been consumed.
|
||||||
|
fn consume_c_style_block_comment(lexer: &mut Lexer<RawToken>) {
|
||||||
|
while let Some(next_character) = lexer.remainder().chars().next() {
|
||||||
|
if lexer.remainder().starts_with("*/") {
|
||||||
|
lexer.bump(2);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
lexer.bump(next_character.len_utf8());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Consumes a quoted C++ string or character literal.
|
||||||
|
///
|
||||||
|
/// Assumes that the opening delimiter has already been consumed.
|
||||||
|
fn consume_quoted_cpp_literal(lexer: &mut Lexer<RawToken>, delimiter: char) {
|
||||||
|
while let Some(next_character) = lexer.remainder().chars().next() {
|
||||||
|
lexer.bump(next_character.len_utf8());
|
||||||
|
if next_character == '\\' {
|
||||||
|
// Skip the escaped character
|
||||||
|
if let Some(escaped_character) = lexer.remainder().chars().next() {
|
||||||
|
lexer.bump(escaped_character.len_utf8());
|
||||||
|
}
|
||||||
|
} else if next_character == delimiter {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek ahead from the current lexer position, skipping "trivia", and report
|
||||||
|
/// whether the next significant character is `{`.
|
||||||
|
///
|
||||||
|
/// Trivia here means:
|
||||||
|
/// - Spaces and tabs
|
||||||
|
/// - Newlines (`\r`, `\n`, or `\r\n`)
|
||||||
|
/// - Line comments (`// ...`)
|
||||||
|
/// - Block comments (`/* ... */`), including nested ones
|
||||||
|
///
|
||||||
|
/// This is used after lexing tokens like `cpptext` or `cppstruct`, where
|
||||||
|
/// `UnrealScript` allows arbitrary trivia between the keyword and the opening
|
||||||
|
/// brace of the embedded C++ block.
|
||||||
|
///
|
||||||
|
/// Returns `true` if the next non-trivia character is `{`, otherwise `false`.
|
||||||
|
/// If the input ends while skipping trivia, returns `false`.
|
||||||
|
fn is_next_nontrivia_left_brace(lexer: &Lexer<RawToken>) -> bool {
|
||||||
|
let mut remaining = lexer.remainder();
|
||||||
|
|
||||||
|
while let Some(next_character) = remaining.chars().next() {
|
||||||
|
match next_character {
|
||||||
|
' ' | '\t' | '\r' | '\n' => {
|
||||||
|
remaining = &remaining[next_character.len_utf8()..];
|
||||||
|
}
|
||||||
|
'/' if remaining.starts_with("//") => {
|
||||||
|
remaining = &remaining[2..];
|
||||||
|
while let Some(comment_character) = remaining.chars().next() {
|
||||||
|
remaining = &remaining[comment_character.len_utf8()..];
|
||||||
|
if comment_character == '\n' || comment_character == '\r' {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'/' if remaining.starts_with("/*") => {
|
||||||
|
remaining = &remaining[2..];
|
||||||
|
let mut comment_depth = 1;
|
||||||
|
while comment_depth > 0 {
|
||||||
|
if remaining.starts_with("/*") {
|
||||||
|
comment_depth += 1;
|
||||||
|
remaining = &remaining[2..];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if remaining.starts_with("*/") {
|
||||||
|
comment_depth -= 1;
|
||||||
|
remaining = &remaining[2..];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let Some(comment_character) = remaining.chars().next() else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
remaining = &remaining[comment_character.len_utf8()..];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return next_character == '{',
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
|
}
|
||||||
338
rottlib/src/lexer/tests.rs
Normal file
338
rottlib/src/lexer/tests.rs
Normal file
@ -0,0 +1,338 @@
|
|||||||
|
use super::{Keyword, Token, TokenPosition, TokenizedFile, split_visible_line_segments};
|
||||||
|
|
||||||
|
fn reconstruct_source(file: &TokenizedFile<'_>) -> String {
|
||||||
|
file.buffer.iter().map(|piece| piece.lexeme).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn token_kinds_and_lexemes<'src>(file: &TokenizedFile<'src>) -> Vec<(Token, &'src str)> {
|
||||||
|
file.buffer
|
||||||
|
.iter()
|
||||||
|
.map(|piece| (piece.token, piece.lexeme))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_visible_line_segments_returns_empty_for_single_line_text() {
|
||||||
|
assert!(split_visible_line_segments("abcdef").is_empty());
|
||||||
|
assert!(split_visible_line_segments("").is_empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split_visible_line_segments_handles_mixed_line_endings() {
|
||||||
|
let text = "ab\r\ncd\ref\n";
|
||||||
|
let segments = split_visible_line_segments(text);
|
||||||
|
|
||||||
|
assert_eq!(segments, vec![0..2, 4..6, 7..9, 10..10]);
|
||||||
|
|
||||||
|
let visible: Vec<&str> = segments.iter().map(|range| &text[range.clone()]).collect();
|
||||||
|
assert_eq!(visible, vec!["ab", "cd", "ef", ""]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokenization_is_lossless_for_mixed_input() {
|
||||||
|
let source = concat!(
|
||||||
|
"class Foo extends Bar;\r\n",
|
||||||
|
"var string S;\n",
|
||||||
|
"/* block comment */\r",
|
||||||
|
"defaultproperties {}\n",
|
||||||
|
"X = 1.25e+2;\n",
|
||||||
|
);
|
||||||
|
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(reconstruct_source(&file), source);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trailing_newline_does_not_create_extra_empty_line() {
|
||||||
|
let source = "a\n";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(file.lines.len(), 1);
|
||||||
|
assert_eq!(file.lines[0].continued_from, None);
|
||||||
|
assert_eq!(file.lines[0].local_range(), Some(0..2));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
token_kinds_and_lexemes(&file),
|
||||||
|
vec![(Token::Identifier, "a"), (Token::Newline, "\n")]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn final_line_without_trailing_newline_is_committed() {
|
||||||
|
let source = "a\nb";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(file.lines.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(file.lines[0].continued_from, None);
|
||||||
|
assert_eq!(file.lines[0].local_range(), Some(0..2));
|
||||||
|
|
||||||
|
assert_eq!(file.lines[1].continued_from, None);
|
||||||
|
assert_eq!(file.lines[1].local_range(), Some(2..3));
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
token_kinds_and_lexemes(&file),
|
||||||
|
vec![
|
||||||
|
(Token::Identifier, "a"),
|
||||||
|
(Token::Newline, "\n"),
|
||||||
|
(Token::Identifier, "b"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn multiline_block_comment_creates_continuation_line_with_local_tokens() {
|
||||||
|
let source = "a/*x\ny*/b";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
token_kinds_and_lexemes(&file),
|
||||||
|
vec![
|
||||||
|
(Token::Identifier, "a"),
|
||||||
|
(Token::BlockComment, "/*x\ny*/"),
|
||||||
|
(Token::Identifier, "b"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(file.lines.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(file.lines[0].continued_from, None);
|
||||||
|
assert_eq!(file.lines[0].local_range(), Some(0..2));
|
||||||
|
|
||||||
|
assert_eq!(file.lines[1].continued_from, Some(0));
|
||||||
|
assert_eq!(file.lines[1].local_range(), Some(2..3));
|
||||||
|
|
||||||
|
let block_comment_index = 1;
|
||||||
|
assert_eq!(
|
||||||
|
file.multi_line_map.get(&block_comment_index),
|
||||||
|
Some(&vec![0..3, 4..7])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pure_multiline_token_finishes_with_bare_continuation_line() {
|
||||||
|
let source = "/*a\nb*/";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
token_kinds_and_lexemes(&file),
|
||||||
|
vec![(Token::BlockComment, "/*a\nb*/")]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(file.lines.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(file.lines[0].continued_from, None);
|
||||||
|
assert_eq!(file.lines[0].local_range(), Some(0..1));
|
||||||
|
|
||||||
|
assert_eq!(file.lines[1].continued_from, Some(0));
|
||||||
|
assert_eq!(file.lines[1].local_range(), None);
|
||||||
|
|
||||||
|
assert_eq!(file.multi_line_map.get(&0), Some(&vec![0..3, 4..7]));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn nested_block_comments_are_consumed_as_one_token() {
|
||||||
|
let source = "/* outer /* inner */ still outer */";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert!(!file.has_errors());
|
||||||
|
assert_eq!(file.buffer.len(), 1);
|
||||||
|
assert_eq!(file.buffer[0].token, Token::BlockComment);
|
||||||
|
assert_eq!(file.buffer[0].lexeme, source);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cpptext_with_trivia_before_brace_produces_cpp_block_token() {
|
||||||
|
let source = "cpptext /* gap */\n{ int x; if (y) { z(); } }";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
token_kinds_and_lexemes(&file),
|
||||||
|
vec![
|
||||||
|
(Token::Keyword(Keyword::CppText), "cpptext"),
|
||||||
|
(Token::Whitespace, " "),
|
||||||
|
(Token::BlockComment, "/* gap */"),
|
||||||
|
(Token::Newline, "\n"),
|
||||||
|
(Token::CppBlock, "{ int x; if (y) { z(); } }"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(file.lines.len(), 2);
|
||||||
|
|
||||||
|
assert_eq!(file.lines[0].continued_from, None);
|
||||||
|
assert_eq!(file.lines[0].local_range(), Some(0..4));
|
||||||
|
|
||||||
|
assert_eq!(file.lines[1].continued_from, None);
|
||||||
|
assert_eq!(file.lines[1].local_range(), Some(4..5));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn cpptext_without_following_brace_does_not_start_cpp_block_mode() {
|
||||||
|
let source = "cpptext Foo { bar }";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
let tokens = token_kinds_and_lexemes(&file);
|
||||||
|
|
||||||
|
assert!(!tokens.iter().any(|(token, _)| *token == Token::CppBlock));
|
||||||
|
assert!(
|
||||||
|
tokens
|
||||||
|
.iter()
|
||||||
|
.any(|(token, lexeme)| *token == Token::Keyword(Keyword::CppText)
|
||||||
|
&& *lexeme == "cpptext")
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
tokens
|
||||||
|
.iter()
|
||||||
|
.any(|(token, lexeme)| *token == Token::LeftBrace && *lexeme == "{")
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
tokens
|
||||||
|
.iter()
|
||||||
|
.any(|(token, lexeme)| *token == Token::RightBrace && *lexeme == "}")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn utf16_length_is_precomputed_per_token() {
|
||||||
|
let source = "\"😀\"";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(file.buffer.len(), 1);
|
||||||
|
assert_eq!(file.buffer[0].token, Token::StringLiteral);
|
||||||
|
assert_eq!(file.buffer[0].utf16_length, source.encode_utf16().count());
|
||||||
|
assert_eq!(file.buffer[0].utf16_length, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lexer_reports_error_tokens() {
|
||||||
|
let source = "`";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert!(file.has_errors());
|
||||||
|
assert_eq!(reconstruct_source(&file), source);
|
||||||
|
assert_eq!(file.buffer.len(), 1);
|
||||||
|
assert_eq!(file.buffer[0].token, Token::Error);
|
||||||
|
assert_eq!(file.buffer[0].lexeme, "`");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn token_predicates_match_current_rules() {
|
||||||
|
assert!(Token::Identifier.is_valid_identifier_name());
|
||||||
|
assert!(Token::Keyword(Keyword::Int).is_valid_identifier_name());
|
||||||
|
assert!(Token::Keyword(Keyword::Int).is_valid_type_name());
|
||||||
|
assert!(Token::Keyword(Keyword::Delegate).is_valid_type_name());
|
||||||
|
|
||||||
|
assert!(Token::Keyword(Keyword::Exec).is_valid_function_modifier());
|
||||||
|
assert!(Token::Keyword(Keyword::Operator).is_valid_function_modifier());
|
||||||
|
assert!(Token::Keyword(Keyword::Config).is_valid_function_modifier());
|
||||||
|
|
||||||
|
assert!(!Token::Plus.is_valid_identifier_name());
|
||||||
|
assert!(!Token::Plus.is_valid_type_name());
|
||||||
|
assert!(!Token::Keyword(Keyword::If).is_valid_function_modifier());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn tokens_iterator_yields_positions_in_buffer_order() {
|
||||||
|
let source = "a + b";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
let collected: Vec<_> = file.iter().collect();
|
||||||
|
|
||||||
|
assert_eq!(collected.len(), file.buffer.len());
|
||||||
|
|
||||||
|
for (expected_index, (position, token_data)) in collected.into_iter().enumerate() {
|
||||||
|
assert_eq!(position.0, expected_index);
|
||||||
|
assert_eq!(token_data, file.buffer[expected_index]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_token_kinds_and_lexemes<'src>(
|
||||||
|
file: &TokenizedFile<'src>,
|
||||||
|
line_number: usize,
|
||||||
|
) -> Vec<(usize, Token, &'src str)> {
|
||||||
|
file.line_tokens(line_number)
|
||||||
|
.map(|(position, token_data)| (position.0, token_data.token, token_data.lexeme))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_count_counts_physical_lines_without_trailing_empty_line() {
|
||||||
|
assert_eq!(TokenizedFile::tokenize("").line_count(), 0);
|
||||||
|
assert_eq!(TokenizedFile::tokenize("a").line_count(), 1);
|
||||||
|
assert_eq!(TokenizedFile::tokenize("a\n").line_count(), 1);
|
||||||
|
assert_eq!(TokenizedFile::tokenize("a\nb\n").line_count(), 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_tokens_return_only_tokens_that_start_on_that_line() {
|
||||||
|
let source = "a/*x\ny*/b\nc";
|
||||||
|
let file = TokenizedFile::tokenize(source);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
line_token_kinds_and_lexemes(&file, 0),
|
||||||
|
vec![
|
||||||
|
(0, Token::Identifier, "a"),
|
||||||
|
(1, Token::BlockComment, "/*x\ny*/"),
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
// Important: the carried fragment "y*/" is NOT yielded here.
|
||||||
|
assert_eq!(
|
||||||
|
line_token_kinds_and_lexemes(&file, 1),
|
||||||
|
vec![(2, Token::Identifier, "b"), (3, Token::Newline, "\n"),]
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
line_token_kinds_and_lexemes(&file, 2),
|
||||||
|
vec![(4, Token::Identifier, "c")]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_tokens_are_empty_for_continuation_only_or_out_of_bounds_lines() {
|
||||||
|
let file = TokenizedFile::tokenize("/*a\nb*/");
|
||||||
|
|
||||||
|
assert_eq!(file.line_tokens(1).count(), 0);
|
||||||
|
assert_eq!(file.line_tokens(999).count(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn token_at_returns_token_for_valid_position_and_none_for_invalid_one() {
|
||||||
|
let file = TokenizedFile::tokenize("a + b");
|
||||||
|
|
||||||
|
assert_eq!(file.token_at(TokenPosition(0)), Some(file.buffer[0]));
|
||||||
|
assert_eq!(
|
||||||
|
file.token_at(TokenPosition(1)).map(|t| t.token),
|
||||||
|
Some(Token::Whitespace)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
file.token_at(TokenPosition(2)).map(|t| t.token),
|
||||||
|
Some(Token::Plus)
|
||||||
|
);
|
||||||
|
assert_eq!(file.token_at(TokenPosition(file.buffer.len())), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_text_omits_line_terminators_and_handles_empty_lines() {
|
||||||
|
let file = TokenizedFile::tokenize("left\n\nright");
|
||||||
|
|
||||||
|
assert_eq!(file.line_text(0).as_deref(), Some("left"));
|
||||||
|
assert_eq!(file.line_text(1).as_deref(), Some(""));
|
||||||
|
assert_eq!(file.line_text(2).as_deref(), Some("right"));
|
||||||
|
assert_eq!(file.line_text(999), None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_text_includes_carried_fragment_on_continued_line() {
|
||||||
|
let file = TokenizedFile::tokenize("a/*x\ny*/b");
|
||||||
|
|
||||||
|
assert_eq!(file.line_text(1).as_deref(), Some("y*/b"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn line_text_on_origin_line_of_multiline_token_uses_only_visible_part() {
|
||||||
|
let file = TokenizedFile::tokenize("a/*x\ny*/b");
|
||||||
|
|
||||||
|
assert_eq!(file.line_text(0).as_deref(), Some("a/*x"));
|
||||||
|
}
|
||||||
560
rottlib/src/lexer/token.rs
Normal file
560
rottlib/src/lexer/token.rs
Normal file
@ -0,0 +1,560 @@
|
|||||||
|
//! Token definitions for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! These are the tokens consumed by the parser and derived from [`RawToken`]s.
|
||||||
|
|
||||||
|
use super::{BraceKind, raw_lexer::RawToken};
|
||||||
|
|
||||||
|
/// Tokens consumed by the Fermented `UnrealScript` parser.
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum Token {
|
||||||
|
ExecDirective,
|
||||||
|
Keyword(Keyword),
|
||||||
|
// Primaries
|
||||||
|
FloatLiteral,
|
||||||
|
IntegerLiteral,
|
||||||
|
StringLiteral,
|
||||||
|
NameLiteral,
|
||||||
|
Identifier,
|
||||||
|
// Operations
|
||||||
|
Exponentiation,
|
||||||
|
Increment,
|
||||||
|
Decrement,
|
||||||
|
Not,
|
||||||
|
BitwiseNot,
|
||||||
|
Multiply,
|
||||||
|
Divide,
|
||||||
|
Modulo,
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
ConcatSpace,
|
||||||
|
Concat,
|
||||||
|
LeftShift,
|
||||||
|
LogicalRightShift,
|
||||||
|
RightShift,
|
||||||
|
Less,
|
||||||
|
LessEqual,
|
||||||
|
Greater,
|
||||||
|
GreaterEqual,
|
||||||
|
Equal,
|
||||||
|
NotEqual,
|
||||||
|
ApproximatelyEqual,
|
||||||
|
BitwiseAnd,
|
||||||
|
BitwiseOr,
|
||||||
|
BitwiseXor,
|
||||||
|
LogicalAnd,
|
||||||
|
LogicalXor,
|
||||||
|
LogicalOr,
|
||||||
|
Assign,
|
||||||
|
MultiplyAssign,
|
||||||
|
DivideAssign,
|
||||||
|
ModuloAssign,
|
||||||
|
PlusAssign,
|
||||||
|
MinusAssign,
|
||||||
|
ConcatAssign,
|
||||||
|
ConcatSpaceAssign,
|
||||||
|
// Delimiters
|
||||||
|
LeftParenthesis,
|
||||||
|
RightParenthesis,
|
||||||
|
LeftBrace,
|
||||||
|
CppBlock,
|
||||||
|
RightBrace,
|
||||||
|
LeftBracket,
|
||||||
|
RightBracket,
|
||||||
|
Semicolon,
|
||||||
|
Comma,
|
||||||
|
Period,
|
||||||
|
Colon,
|
||||||
|
Hash,
|
||||||
|
Question,
|
||||||
|
// Trivia
|
||||||
|
LineComment,
|
||||||
|
BlockComment,
|
||||||
|
Newline,
|
||||||
|
Whitespace,
|
||||||
|
// Technical - for representing a very wrong sequence of characters
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<RawToken> for Token {
|
||||||
|
#![allow(clippy::too_many_lines)]
|
||||||
|
fn from(token: RawToken) -> Self {
|
||||||
|
match token {
|
||||||
|
// Non-trivial conversions
|
||||||
|
RawToken::Brace(BraceKind::Normal) => Self::LeftBrace,
|
||||||
|
RawToken::Brace(BraceKind::CppBlock) => Self::CppBlock,
|
||||||
|
// Keyword conversions
|
||||||
|
RawToken::CppText => Self::Keyword(Keyword::CppText),
|
||||||
|
RawToken::CppStruct => Self::Keyword(Keyword::CppStruct),
|
||||||
|
RawToken::Class => Self::Keyword(Keyword::Class),
|
||||||
|
RawToken::Struct => Self::Keyword(Keyword::Struct),
|
||||||
|
RawToken::Enum => Self::Keyword(Keyword::Enum),
|
||||||
|
RawToken::State => Self::Keyword(Keyword::State),
|
||||||
|
RawToken::Auto => Self::Keyword(Keyword::Auto),
|
||||||
|
RawToken::Function => Self::Keyword(Keyword::Function),
|
||||||
|
RawToken::Event => Self::Keyword(Keyword::Event),
|
||||||
|
RawToken::Delegate => Self::Keyword(Keyword::Delegate),
|
||||||
|
RawToken::Var => Self::Keyword(Keyword::Var),
|
||||||
|
RawToken::Local => Self::Keyword(Keyword::Local),
|
||||||
|
RawToken::Extends => Self::Keyword(Keyword::Extends),
|
||||||
|
RawToken::DependsOn => Self::Keyword(Keyword::DependsOn),
|
||||||
|
RawToken::Private => Self::Keyword(Keyword::Private),
|
||||||
|
RawToken::Protected => Self::Keyword(Keyword::Protected),
|
||||||
|
RawToken::Public => Self::Keyword(Keyword::Public),
|
||||||
|
RawToken::Const => Self::Keyword(Keyword::Const),
|
||||||
|
RawToken::Static => Self::Keyword(Keyword::Static),
|
||||||
|
RawToken::Native => Self::Keyword(Keyword::Native),
|
||||||
|
RawToken::Abstract => Self::Keyword(Keyword::Abstract),
|
||||||
|
RawToken::Deprecated => Self::Keyword(Keyword::Deprecated),
|
||||||
|
RawToken::SafeReplace => Self::Keyword(Keyword::SafeReplace),
|
||||||
|
RawToken::ExportStructs => Self::Keyword(Keyword::ExportStructs),
|
||||||
|
RawToken::Input => Self::Keyword(Keyword::Input),
|
||||||
|
RawToken::Final => Self::Keyword(Keyword::Final),
|
||||||
|
RawToken::Default => Self::Keyword(Keyword::Default),
|
||||||
|
RawToken::DefaultProperties => Self::Keyword(Keyword::DefaultProperties),
|
||||||
|
RawToken::Object => Self::Keyword(Keyword::Object),
|
||||||
|
RawToken::Begin => Self::Keyword(Keyword::Begin),
|
||||||
|
RawToken::End => Self::Keyword(Keyword::End),
|
||||||
|
RawToken::Optional => Self::Keyword(Keyword::Optional),
|
||||||
|
RawToken::Config => Self::Keyword(Keyword::Config),
|
||||||
|
RawToken::PerObjectConfig => Self::Keyword(Keyword::PerObjectConfig),
|
||||||
|
RawToken::GlobalConfig => Self::Keyword(Keyword::GlobalConfig),
|
||||||
|
RawToken::CollapseCategories => Self::Keyword(Keyword::CollapseCategories),
|
||||||
|
RawToken::DontCollapseCategories => Self::Keyword(Keyword::DontCollapseCategories),
|
||||||
|
RawToken::HideCategories => Self::Keyword(Keyword::HideCategories),
|
||||||
|
RawToken::ShowCategories => Self::Keyword(Keyword::ShowCategories),
|
||||||
|
RawToken::Localized => Self::Keyword(Keyword::Localized),
|
||||||
|
RawToken::Placeable => Self::Keyword(Keyword::Placeable),
|
||||||
|
RawToken::NotPlaceable => Self::Keyword(Keyword::NotPlaceable),
|
||||||
|
RawToken::Instanced => Self::Keyword(Keyword::Instanced),
|
||||||
|
RawToken::EditConst => Self::Keyword(Keyword::EditConst),
|
||||||
|
RawToken::EditConstArray => Self::Keyword(Keyword::EditConstArray),
|
||||||
|
RawToken::EditInline => Self::Keyword(Keyword::EditInline),
|
||||||
|
RawToken::EditInlineUse => Self::Keyword(Keyword::EditInlineUse),
|
||||||
|
RawToken::EditInlineNew => Self::Keyword(Keyword::EditInlineNew),
|
||||||
|
RawToken::NotEditInlineNew => Self::Keyword(Keyword::NotEditInlineNew),
|
||||||
|
RawToken::EdFindable => Self::Keyword(Keyword::EdFindable),
|
||||||
|
RawToken::EditInlineNotify => Self::Keyword(Keyword::EditInlineNotify),
|
||||||
|
RawToken::ParseConfig => Self::Keyword(Keyword::ParseConfig),
|
||||||
|
RawToken::Automated => Self::Keyword(Keyword::Automated),
|
||||||
|
RawToken::DynamicRecompile => Self::Keyword(Keyword::DynamicRecompile),
|
||||||
|
RawToken::Transient => Self::Keyword(Keyword::Transient),
|
||||||
|
RawToken::Long => Self::Keyword(Keyword::Long),
|
||||||
|
RawToken::Operator => Self::Keyword(Keyword::Operator),
|
||||||
|
RawToken::PreOperator => Self::Keyword(Keyword::PreOperator),
|
||||||
|
RawToken::PostOperator => Self::Keyword(Keyword::PostOperator),
|
||||||
|
RawToken::Simulated => Self::Keyword(Keyword::Simulated),
|
||||||
|
RawToken::Exec => Self::Keyword(Keyword::Exec),
|
||||||
|
RawToken::Latent => Self::Keyword(Keyword::Latent),
|
||||||
|
RawToken::Iterator => Self::Keyword(Keyword::Iterator),
|
||||||
|
RawToken::Out => Self::Keyword(Keyword::Out),
|
||||||
|
RawToken::Skip => Self::Keyword(Keyword::Skip),
|
||||||
|
RawToken::Singular => Self::Keyword(Keyword::Singular),
|
||||||
|
RawToken::Coerce => Self::Keyword(Keyword::Coerce),
|
||||||
|
RawToken::Assert => Self::Keyword(Keyword::Assert),
|
||||||
|
RawToken::Ignores => Self::Keyword(Keyword::Ignores),
|
||||||
|
RawToken::Within => Self::Keyword(Keyword::Within),
|
||||||
|
RawToken::Init => Self::Keyword(Keyword::Init),
|
||||||
|
RawToken::Export => Self::Keyword(Keyword::Export),
|
||||||
|
RawToken::NoExport => Self::Keyword(Keyword::NoExport),
|
||||||
|
RawToken::HideDropdown => Self::Keyword(Keyword::HideDropdown),
|
||||||
|
RawToken::Travel => Self::Keyword(Keyword::Travel),
|
||||||
|
RawToken::Cache => Self::Keyword(Keyword::Cache),
|
||||||
|
RawToken::CacheExempt => Self::Keyword(Keyword::CacheExempt),
|
||||||
|
RawToken::Reliable => Self::Keyword(Keyword::Reliable),
|
||||||
|
RawToken::Unreliable => Self::Keyword(Keyword::Unreliable),
|
||||||
|
RawToken::Replication => Self::Keyword(Keyword::Replication),
|
||||||
|
RawToken::NativeReplication => Self::Keyword(Keyword::NativeReplication),
|
||||||
|
RawToken::Goto => Self::Keyword(Keyword::Goto),
|
||||||
|
RawToken::If => Self::Keyword(Keyword::If),
|
||||||
|
RawToken::Else => Self::Keyword(Keyword::Else),
|
||||||
|
RawToken::Switch => Self::Keyword(Keyword::Switch),
|
||||||
|
RawToken::Case => Self::Keyword(Keyword::Case),
|
||||||
|
RawToken::For => Self::Keyword(Keyword::For),
|
||||||
|
RawToken::ForEach => Self::Keyword(Keyword::ForEach),
|
||||||
|
RawToken::While => Self::Keyword(Keyword::While),
|
||||||
|
RawToken::Do => Self::Keyword(Keyword::Do),
|
||||||
|
RawToken::Until => Self::Keyword(Keyword::Until),
|
||||||
|
RawToken::Break => Self::Keyword(Keyword::Break),
|
||||||
|
RawToken::Continue => Self::Keyword(Keyword::Continue),
|
||||||
|
RawToken::Return => Self::Keyword(Keyword::Return),
|
||||||
|
RawToken::Int => Self::Keyword(Keyword::Int),
|
||||||
|
RawToken::Float => Self::Keyword(Keyword::Float),
|
||||||
|
RawToken::Bool => Self::Keyword(Keyword::Bool),
|
||||||
|
RawToken::Byte => Self::Keyword(Keyword::Byte),
|
||||||
|
RawToken::String => Self::Keyword(Keyword::String),
|
||||||
|
RawToken::Array => Self::Keyword(Keyword::Array),
|
||||||
|
RawToken::Name => Self::Keyword(Keyword::Name),
|
||||||
|
RawToken::True => Self::Keyword(Keyword::True),
|
||||||
|
RawToken::False => Self::Keyword(Keyword::False),
|
||||||
|
RawToken::None => Self::Keyword(Keyword::None),
|
||||||
|
RawToken::SelfValue => Self::Keyword(Keyword::SelfValue),
|
||||||
|
RawToken::New => Self::Keyword(Keyword::New),
|
||||||
|
RawToken::Dot => Self::Keyword(Keyword::Dot),
|
||||||
|
RawToken::Cross => Self::Keyword(Keyword::Cross),
|
||||||
|
RawToken::ClockwiseFrom => Self::Keyword(Keyword::ClockwiseFrom),
|
||||||
|
// Trivial 1-to-1 conversions.
|
||||||
|
RawToken::ExecDirective => Self::ExecDirective,
|
||||||
|
RawToken::FloatLiteral => Self::FloatLiteral,
|
||||||
|
RawToken::IntegerLiteral => Self::IntegerLiteral,
|
||||||
|
RawToken::StringLiteral => Self::StringLiteral,
|
||||||
|
RawToken::NameLiteral => Self::NameLiteral,
|
||||||
|
RawToken::Identifier => Self::Identifier,
|
||||||
|
RawToken::Exponentiation => Self::Exponentiation,
|
||||||
|
RawToken::Increment => Self::Increment,
|
||||||
|
RawToken::Decrement => Self::Decrement,
|
||||||
|
RawToken::Not => Self::Not,
|
||||||
|
RawToken::BitwiseNot => Self::BitwiseNot,
|
||||||
|
RawToken::Multiply => Self::Multiply,
|
||||||
|
RawToken::Divide => Self::Divide,
|
||||||
|
RawToken::Modulo => Self::Modulo,
|
||||||
|
RawToken::Plus => Self::Plus,
|
||||||
|
RawToken::Minus => Self::Minus,
|
||||||
|
RawToken::ConcatSpace => Self::ConcatSpace,
|
||||||
|
RawToken::Concat => Self::Concat,
|
||||||
|
RawToken::LeftShift => Self::LeftShift,
|
||||||
|
RawToken::LogicalRightShift => Self::LogicalRightShift,
|
||||||
|
RawToken::RightShift => Self::RightShift,
|
||||||
|
RawToken::Less => Self::Less,
|
||||||
|
RawToken::LessEqual => Self::LessEqual,
|
||||||
|
RawToken::Greater => Self::Greater,
|
||||||
|
RawToken::GreaterEqual => Self::GreaterEqual,
|
||||||
|
RawToken::Equal => Self::Equal,
|
||||||
|
RawToken::NotEqual => Self::NotEqual,
|
||||||
|
RawToken::ApproximatelyEqual => Self::ApproximatelyEqual,
|
||||||
|
RawToken::BitwiseAnd => Self::BitwiseAnd,
|
||||||
|
RawToken::BitwiseOr => Self::BitwiseOr,
|
||||||
|
RawToken::BitwiseXor => Self::BitwiseXor,
|
||||||
|
RawToken::LogicalAnd => Self::LogicalAnd,
|
||||||
|
RawToken::LogicalXor => Self::LogicalXor,
|
||||||
|
RawToken::LogicalOr => Self::LogicalOr,
|
||||||
|
RawToken::Assign => Self::Assign,
|
||||||
|
RawToken::MultiplyAssign => Self::MultiplyAssign,
|
||||||
|
RawToken::DivideAssign => Self::DivideAssign,
|
||||||
|
RawToken::ModuloAssign => Self::ModuloAssign,
|
||||||
|
RawToken::PlusAssign => Self::PlusAssign,
|
||||||
|
RawToken::MinusAssign => Self::MinusAssign,
|
||||||
|
RawToken::ConcatAssign => Self::ConcatAssign,
|
||||||
|
RawToken::ConcatSpaceAssign => Self::ConcatSpaceAssign,
|
||||||
|
RawToken::LeftParenthesis => Self::LeftParenthesis,
|
||||||
|
RawToken::RightParenthesis => Self::RightParenthesis,
|
||||||
|
RawToken::RightBrace => Self::RightBrace,
|
||||||
|
RawToken::LeftBracket => Self::LeftBracket,
|
||||||
|
RawToken::RightBracket => Self::RightBracket,
|
||||||
|
RawToken::Semicolon => Self::Semicolon,
|
||||||
|
RawToken::Comma => Self::Comma,
|
||||||
|
RawToken::Period => Self::Period,
|
||||||
|
RawToken::Colon => Self::Colon,
|
||||||
|
RawToken::Hash => Self::Hash,
|
||||||
|
RawToken::Question => Self::Question,
|
||||||
|
RawToken::LineComment => Self::LineComment,
|
||||||
|
RawToken::BlockComment => Self::BlockComment,
|
||||||
|
RawToken::Newline => Self::Newline,
|
||||||
|
RawToken::Whitespace => Self::Whitespace,
|
||||||
|
RawToken::Error => Self::Error,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
/// Returns `true` if this token is a newline.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_newline(&self) -> bool {
|
||||||
|
matches!(self, Self::Newline)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this token is trivia whitespace.
|
||||||
|
///
|
||||||
|
/// Note: comments are **not** considered whitespace.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_whitespace(&self) -> bool {
|
||||||
|
matches!(self, Self::Whitespace | Self::Newline)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this token may span multiple physical lines
|
||||||
|
/// (i.e. can contain newline characters).
|
||||||
|
#[must_use]
|
||||||
|
pub const fn can_span_lines(&self) -> bool {
|
||||||
|
matches!(self, Self::BlockComment | Self::CppBlock | Self::Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this token can appear in type position
|
||||||
|
/// (either a built-in type keyword or an identifier).
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_valid_type_name(&self) -> bool {
|
||||||
|
let Self::Keyword(keyword) = self else {
|
||||||
|
return *self == Self::Identifier;
|
||||||
|
};
|
||||||
|
keyword.is_valid_type_name()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this token can be used as an identifier.
|
||||||
|
///
|
||||||
|
/// This includes [`Token::Identifier`] and certain keywords that
|
||||||
|
/// `UnrealScript` also accepts in identifier position.
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_valid_identifier_name(&self) -> bool {
|
||||||
|
if *self == Self::Identifier {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if let Self::Keyword(keyword) = self {
|
||||||
|
return keyword.is_valid_identifier_name();
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this token can be used as function's modifier.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_valid_function_modifier(&self) -> bool {
|
||||||
|
let Self::Keyword(keyword) = self else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
matches!(
|
||||||
|
keyword,
|
||||||
|
Keyword::Final
|
||||||
|
| Keyword::Native
|
||||||
|
| Keyword::Abstract
|
||||||
|
| Keyword::Transient
|
||||||
|
| Keyword::Public
|
||||||
|
| Keyword::Protected
|
||||||
|
| Keyword::Private
|
||||||
|
| Keyword::Static
|
||||||
|
| Keyword::Const
|
||||||
|
| Keyword::Deprecated
|
||||||
|
| Keyword::NoExport
|
||||||
|
| Keyword::Export
|
||||||
|
| Keyword::Simulated
|
||||||
|
| Keyword::Latent
|
||||||
|
| Keyword::Iterator
|
||||||
|
| Keyword::Singular
|
||||||
|
| Keyword::Reliable
|
||||||
|
| Keyword::Unreliable
|
||||||
|
| Keyword::NativeReplication
|
||||||
|
| Keyword::PreOperator
|
||||||
|
| Keyword::Operator
|
||||||
|
| Keyword::PostOperator
|
||||||
|
| Keyword::Config
|
||||||
|
| Keyword::Exec
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reserved words of Fermented `UnrealScript`.
|
||||||
|
///
|
||||||
|
/// These are represented in [`Token`] as [`Token::Keyword`].
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
pub enum Keyword {
|
||||||
|
// C++ blocks
|
||||||
|
CppText,
|
||||||
|
CppStruct,
|
||||||
|
// Top-level declaration
|
||||||
|
Class,
|
||||||
|
Struct,
|
||||||
|
Enum,
|
||||||
|
State,
|
||||||
|
Auto,
|
||||||
|
Function,
|
||||||
|
Event,
|
||||||
|
Delegate,
|
||||||
|
Var,
|
||||||
|
Local,
|
||||||
|
// Class modifiers
|
||||||
|
Extends,
|
||||||
|
DependsOn,
|
||||||
|
// Access modifiers
|
||||||
|
Private,
|
||||||
|
Protected,
|
||||||
|
Public,
|
||||||
|
Const,
|
||||||
|
// Meta data / specifiers
|
||||||
|
Static,
|
||||||
|
Native,
|
||||||
|
Abstract,
|
||||||
|
Deprecated,
|
||||||
|
SafeReplace,
|
||||||
|
ExportStructs,
|
||||||
|
Input,
|
||||||
|
Final,
|
||||||
|
Default,
|
||||||
|
DefaultProperties,
|
||||||
|
Object,
|
||||||
|
Begin,
|
||||||
|
End,
|
||||||
|
Optional,
|
||||||
|
Config,
|
||||||
|
PerObjectConfig,
|
||||||
|
GlobalConfig,
|
||||||
|
CollapseCategories,
|
||||||
|
DontCollapseCategories,
|
||||||
|
HideCategories,
|
||||||
|
ShowCategories,
|
||||||
|
Localized,
|
||||||
|
Placeable,
|
||||||
|
NotPlaceable,
|
||||||
|
Instanced,
|
||||||
|
EditConst,
|
||||||
|
EditConstArray,
|
||||||
|
EditInline,
|
||||||
|
EditInlineUse,
|
||||||
|
EditInlineNew,
|
||||||
|
NotEditInlineNew,
|
||||||
|
EdFindable,
|
||||||
|
EditInlineNotify,
|
||||||
|
ParseConfig,
|
||||||
|
Automated,
|
||||||
|
DynamicRecompile,
|
||||||
|
Transient,
|
||||||
|
Long,
|
||||||
|
Operator,
|
||||||
|
PreOperator,
|
||||||
|
PostOperator,
|
||||||
|
Simulated,
|
||||||
|
Exec,
|
||||||
|
Latent,
|
||||||
|
Iterator,
|
||||||
|
Out,
|
||||||
|
Skip,
|
||||||
|
Singular,
|
||||||
|
Coerce,
|
||||||
|
Assert,
|
||||||
|
Ignores,
|
||||||
|
Within,
|
||||||
|
Init,
|
||||||
|
Export,
|
||||||
|
NoExport,
|
||||||
|
HideDropdown,
|
||||||
|
Travel,
|
||||||
|
Cache,
|
||||||
|
CacheExempt,
|
||||||
|
// Replication
|
||||||
|
Reliable,
|
||||||
|
Unreliable,
|
||||||
|
Replication,
|
||||||
|
NativeReplication,
|
||||||
|
// Control flow
|
||||||
|
Goto,
|
||||||
|
If,
|
||||||
|
Else,
|
||||||
|
Switch,
|
||||||
|
Case,
|
||||||
|
For,
|
||||||
|
ForEach,
|
||||||
|
While,
|
||||||
|
Do,
|
||||||
|
Until,
|
||||||
|
Break,
|
||||||
|
Continue,
|
||||||
|
Return,
|
||||||
|
// Built-in types
|
||||||
|
Int,
|
||||||
|
Float,
|
||||||
|
Bool,
|
||||||
|
Byte,
|
||||||
|
String,
|
||||||
|
Array,
|
||||||
|
Name,
|
||||||
|
// Literals
|
||||||
|
True,
|
||||||
|
False,
|
||||||
|
None,
|
||||||
|
SelfValue,
|
||||||
|
New,
|
||||||
|
// Vector math operators
|
||||||
|
Dot,
|
||||||
|
Cross,
|
||||||
|
ClockwiseFrom,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Keyword {
|
||||||
|
/// Returns `true` if this keyword can be used as an identifier.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_valid_identifier_name(self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
// Built-in type words usable as identifiers
|
||||||
|
Self::Name
|
||||||
|
| Self::String
|
||||||
|
| Self::Byte
|
||||||
|
| Self::Int
|
||||||
|
| Self::Bool
|
||||||
|
| Self::Float
|
||||||
|
| Self::Array
|
||||||
|
| Self::Delegate
|
||||||
|
// Context keywords we've directly checked
|
||||||
|
| Self::Class
|
||||||
|
| Self::SelfValue
|
||||||
|
| Self::Default
|
||||||
|
| Self::Static
|
||||||
|
| Self::Simulated
|
||||||
|
| Self::Native
|
||||||
|
| Self::Latent
|
||||||
|
| Self::Iterator
|
||||||
|
| Self::Singular
|
||||||
|
| Self::Reliable
|
||||||
|
| Self::Unreliable
|
||||||
|
| Self::Transient
|
||||||
|
| Self::Const
|
||||||
|
| Self::Abstract
|
||||||
|
| Self::New
|
||||||
|
| Self::Extends
|
||||||
|
| Self::Within
|
||||||
|
| Self::Config
|
||||||
|
| Self::Out
|
||||||
|
| Self::Optional
|
||||||
|
| Self::Local
|
||||||
|
| Self::Var
|
||||||
|
| Self::DefaultProperties
|
||||||
|
| Self::PerObjectConfig
|
||||||
|
| Self::Object
|
||||||
|
| Self::Enum
|
||||||
|
| Self::End
|
||||||
|
| Self::Event
|
||||||
|
| Self::Switch
|
||||||
|
| Self::Goto
|
||||||
|
| Self::Cross
|
||||||
|
| Self::CppText
|
||||||
|
| Self::CppStruct
|
||||||
|
| Self::HideCategories
|
||||||
|
| Self::Auto
|
||||||
|
| Self::For
|
||||||
|
| Self::Skip
|
||||||
|
| Self::Placeable
|
||||||
|
| Self::NotPlaceable
|
||||||
|
| Self::Instanced
|
||||||
|
| Self::Function
|
||||||
|
| Self::State
|
||||||
|
| Self::Init
|
||||||
|
| Self::Export
|
||||||
|
| Self::NoExport
|
||||||
|
| Self::Dot
|
||||||
|
| Self::ClockwiseFrom
|
||||||
|
| Self::Assert
|
||||||
|
| Self::ExportStructs
|
||||||
|
| Self::SafeReplace
|
||||||
|
| Self::Input
|
||||||
|
| Self::Travel
|
||||||
|
| Self::Cache
|
||||||
|
| Self::CacheExempt
|
||||||
|
| Self::Long
|
||||||
|
| Self::Continue
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if this keyword can appear in type position.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn is_valid_type_name(self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self,
|
||||||
|
Self::Int
|
||||||
|
| Self::Float
|
||||||
|
| Self::Bool
|
||||||
|
| Self::Byte
|
||||||
|
| Self::String
|
||||||
|
| Self::Array
|
||||||
|
| Self::Name
|
||||||
|
| Self::Object
|
||||||
|
| Self::Function
|
||||||
|
| Self::State
|
||||||
|
| Self::Delegate
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,48 +1,45 @@
|
|||||||
//! Cursor utilities for a token stream.
|
//! Cursor utilities for a token stream.
|
||||||
//!
|
//!
|
||||||
//! Provides memoized lookahead over significant tokens and attaches
|
//! Provides memoized lookahead over significant tokens and records trivia in
|
||||||
//! trivia to [`TriviaComponent`]. Significant tokens exclude whitespace and
|
//! [`TriviaIndexBuilder`]. Significant tokens exclude whitespace and comments;
|
||||||
//! comments; see [`crate::parser::TriviaKind`].
|
//! see [`parser::TriviaKind`].
|
||||||
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
use std::collections::VecDeque;
|
||||||
use crate::parser::trivia::TriviaComponent;
|
|
||||||
|
use crate::{
|
||||||
|
ast::AstSpan,
|
||||||
|
lexer::{self, Keyword, Token, TokenPosition},
|
||||||
|
parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder},
|
||||||
|
};
|
||||||
|
|
||||||
/// Cursor over a token stream with memoized lookahead and trivia attachment.
|
/// Cursor over a token stream with memoized lookahead and trivia attachment.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub(crate) struct CursorComponent<'src> {
|
pub(crate) struct Cursor<'file, 'src> {
|
||||||
/// Underlying token stream.
|
tokens: lexer::Tokens<'file, 'src>,
|
||||||
tokens: crate::lexer::Tokens<'src>,
|
lookahead_buffer: VecDeque<(TokenPosition, lexer::TokenData<'src>)>,
|
||||||
/// Significant-token lookahead buffer.
|
last_consumed_position: Option<TokenPosition>,
|
||||||
lookahead_buffer: std::collections::VecDeque<(TokenLocation, crate::lexer::TokenPiece<'src>)>,
|
|
||||||
/// Location of the last consumed token.
|
|
||||||
previous_location: Option<TokenLocation>,
|
|
||||||
/// Location of the last significant token.
|
|
||||||
///
|
|
||||||
/// Used to associate following trivia with the correct token.
|
|
||||||
last_significant_location: Option<TokenLocation>,
|
|
||||||
/// Scratch space for [`CursorComponent::buffer_next_significant_token`],
|
|
||||||
/// used to avoid reallocations.
|
|
||||||
trivia_buffer: Vec<crate::parser::trivia::TriviaToken<'src>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> CursorComponent<'src> {
|
impl<'file, 'src> Cursor<'file, 'src> {
|
||||||
/// Create a [`CursorComponent`] over the tokens of `file`.
|
/// Creates a [`Cursor`] over `tokenized_file`.
|
||||||
pub(crate) fn new(tokenized_file: &'src crate::lexer::TokenizedFile<'src>) -> Self {
|
pub(crate) const fn new(tokenized_file: &'file lexer::TokenizedFile<'src>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
tokens: tokenized_file.tokens(),
|
tokens: tokenized_file.iter(),
|
||||||
lookahead_buffer: std::collections::VecDeque::new(),
|
lookahead_buffer: VecDeque::new(),
|
||||||
previous_location: None,
|
last_consumed_position: None,
|
||||||
last_significant_location: None,
|
|
||||||
trivia_buffer: Vec::new(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ensure the lookahead buffer contains at least `lookahead + 1`
|
/// Ensures that the lookahead buffer contains at least `lookahead + 1`
|
||||||
/// significant tokens.
|
/// significant tokens, if available.
|
||||||
///
|
///
|
||||||
/// May consume trivia from the underlying stream.
|
/// May consume trivia from the underlying stream without consuming
|
||||||
/// Does not consume significant tokens.
|
/// significant tokens.
|
||||||
fn ensure_min_lookahead(&mut self, lookahead: usize, trivia: &mut TriviaComponent<'src>) {
|
fn ensure_lookahead_available(
|
||||||
|
&mut self,
|
||||||
|
lookahead: usize,
|
||||||
|
trivia: &mut TriviaIndexBuilder<'src>,
|
||||||
|
) {
|
||||||
while self.lookahead_buffer.len() <= lookahead {
|
while self.lookahead_buffer.len() <= lookahead {
|
||||||
if !self.buffer_next_significant_token(trivia) {
|
if !self.buffer_next_significant_token(trivia) {
|
||||||
break;
|
break;
|
||||||
@ -50,181 +47,320 @@ impl<'src> CursorComponent<'src> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scan to the next significant token, recording intervening trivia.
|
/// Buffers the next significant token and records any preceding trivia.
|
||||||
///
|
///
|
||||||
/// Returns `true` if a significant token was buffered,
|
/// Returns `true` if a significant token was buffered, or `false` if the
|
||||||
/// `false` on end of file.
|
/// stream is exhausted.
|
||||||
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaComponent<'src>) -> bool {
|
fn buffer_next_significant_token(&mut self, trivia: &mut TriviaIndexBuilder<'src>) -> bool {
|
||||||
self.trivia_buffer.clear();
|
for (token_position, token_data) in self.tokens.by_ref() {
|
||||||
while let Some((token_location, token_piece)) = self.tokens.next() {
|
if let Ok(trivia_kind) = parser::TriviaKind::try_from(token_data.token) {
|
||||||
if let Ok(trivia_kind) = crate::parser::TriviaKind::try_from(token_piece.token) {
|
trivia.record_trivia(parser::TriviaToken {
|
||||||
self.trivia_buffer.push(crate::parser::TriviaToken {
|
|
||||||
kind: trivia_kind,
|
kind: trivia_kind,
|
||||||
text: token_piece.lexeme,
|
text: token_data.lexeme,
|
||||||
location: token_location,
|
position: token_position,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Attach trivia found after the previous significant token
|
trivia.record_significant_token(token_position);
|
||||||
if !self.trivia_buffer.is_empty() {
|
|
||||||
trivia.record_between_locations(
|
|
||||||
self.last_significant_location,
|
|
||||||
token_location,
|
|
||||||
&mut self.trivia_buffer,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
self.lookahead_buffer
|
self.lookahead_buffer
|
||||||
.push_back((token_location, token_piece));
|
.push_back((token_position, token_data));
|
||||||
self.last_significant_location = Some(token_location);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Reached end-of-file: attach trailing trivia
|
|
||||||
if !self.trivia_buffer.is_empty() {
|
|
||||||
trivia.record_between_locations(
|
|
||||||
self.last_significant_location,
|
|
||||||
TokenLocation::EndOfFile,
|
|
||||||
&mut self.trivia_buffer,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
/// Returns the next token without consuming it.
|
fn peek_buffered_token(&mut self) -> Option<&(TokenPosition, lexer::TokenData<'src>)> {
|
||||||
|
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
|
||||||
|
self.cursor.lookahead_buffer.front()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next significant token without consuming it.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
///
|
///
|
||||||
/// Returns [`None`] if no tokens remain.
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn peek_token(&mut self) -> Option<Token> {
|
pub(crate) fn peek_token(&mut self) -> Option<Token> {
|
||||||
self.peek_entry().map(|(_, token_piece)| token_piece.token)
|
self.peek_buffered_token()
|
||||||
|
.map(|(_, token_data)| token_data.token)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the next token, its lexeme, and its location
|
/// Returns the next keyword without consuming it.
|
||||||
/// without consuming it.
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if no tokens remain or if the next token is not
|
||||||
|
/// a keyword.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn peek_keyword(&mut self) -> Option<Keyword> {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::Keyword(keyword)) => Some(keyword),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the position of the next significant token without consuming it.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
///
|
///
|
||||||
/// Returns [`None`] if no tokens remain.
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn peek_token_lexeme_and_location(
|
pub(crate) fn peek_position(&mut self) -> Option<TokenPosition> {
|
||||||
&mut self,
|
self.peek_buffered_token()
|
||||||
) -> Option<(Token, &'src str, TokenLocation)> {
|
.map(|(token_position, _)| *token_position)
|
||||||
self.peek_entry().map(|(token_location, token_piece)| {
|
|
||||||
(token_piece.token, token_piece.lexeme, *token_location)
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the next token and its lexeme without consuming it.
|
/// Returns the next significant token and its lexeme without consuming it.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
///
|
///
|
||||||
/// Returns [`None`] if no tokens remain.
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
|
pub(crate) fn peek_token_and_lexeme(&mut self) -> Option<(Token, &'src str)> {
|
||||||
self.peek_entry()
|
self.peek_buffered_token()
|
||||||
.map(|(_, token_piece)| (token_piece.token, token_piece.lexeme))
|
.map(|(_, token_data)| (token_data.token, token_data.lexeme))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the next token and its location without consuming it.
|
/// Returns the next significant token and its position without consuming
|
||||||
|
/// it.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
///
|
///
|
||||||
/// Returns [`None`] if no tokens remain.
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn peek_token_and_location(&mut self) -> Option<(Token, TokenLocation)> {
|
pub(crate) fn peek_token_and_position(&mut self) -> Option<(Token, TokenPosition)> {
|
||||||
self.peek_entry()
|
self.peek_buffered_token()
|
||||||
.map(|(token_location, token_piece)| (token_piece.token, *token_location))
|
.map(|(token_position, token_data)| (token_data.token, *token_position))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the location of the next token, or [`TokenLocation::EndOfFile`]
|
/// Returns the next keyword and its position without consuming it.
|
||||||
/// if none remain.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn peek_location(&mut self) -> TokenLocation {
|
|
||||||
self.peek_entry()
|
|
||||||
.map(|(token_location, _)| *token_location)
|
|
||||||
.unwrap_or(TokenLocation::EndOfFile)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the location of the last token that was actually consumed
|
|
||||||
/// by [`crate::parser::Parser::advance`].
|
|
||||||
///
|
///
|
||||||
/// Returns [`None`] if no tokens have been consumed yet.
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if next token isn't keyword or no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn last_consumed_location(&self) -> Option<TokenLocation> {
|
pub(crate) fn peek_keyword_and_position(&mut self) -> Option<(Keyword, TokenPosition)> {
|
||||||
self.cursor.previous_location
|
let Some((Token::Keyword(keyword), keyword_position)) = self.peek_token_and_position()
|
||||||
|
else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
Some((keyword, keyword_position))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the most recent location the parser is "at".
|
/// Returns the next significant token, its lexeme, and its position
|
||||||
|
/// without consuming them.
|
||||||
///
|
///
|
||||||
/// If at least one token has been consumed, this is the location of the
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
/// last consumed token. Otherwise it falls back to the location of the
|
/// consume any significant token.
|
||||||
/// first significant token in the stream (or [`TokenLocation::EndOfFile`]
|
///
|
||||||
/// if the stream is empty).
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn last_visited_location(&mut self) -> TokenLocation {
|
pub(crate) fn peek_token_lexeme_and_position(
|
||||||
// Only has to `unwrap` before *any* characters were consumed
|
&mut self,
|
||||||
self.last_consumed_location()
|
) -> Option<(Token, &'src str, TokenPosition)> {
|
||||||
.unwrap_or_else(|| self.peek_location())
|
self.peek_buffered_token()
|
||||||
|
.map(|(token_position, token_data)| {
|
||||||
|
(token_data.token, token_data.lexeme, *token_position)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peeks the token at `lookahead` (`0` is the next token)
|
/// Returns the next significant token at `lookahead` without consuming it.
|
||||||
/// without consuming.
|
|
||||||
///
|
///
|
||||||
/// Returns `None` if the stream ends before that position.
|
/// `lookahead` counts significant tokens, with `0` referring to the next
|
||||||
|
/// significant token.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if no tokens remain.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
|
pub(crate) fn peek_token_at(&mut self, lookahead: usize) -> Option<Token> {
|
||||||
self.cursor
|
self.cursor
|
||||||
.ensure_min_lookahead(lookahead, &mut self.trivia);
|
.ensure_lookahead_available(lookahead, &mut self.trivia);
|
||||||
self.cursor
|
self.cursor
|
||||||
.lookahead_buffer
|
.lookahead_buffer
|
||||||
.get(lookahead)
|
.get(lookahead)
|
||||||
.map(|(_, token_piece)| token_piece.token)
|
.map(|(_, token_data)| token_data.token)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the keyword at `lookahead` without consuming it.
|
||||||
|
///
|
||||||
|
/// `lookahead` counts significant tokens, with `0` referring to the next
|
||||||
|
/// significant token.
|
||||||
|
///
|
||||||
|
/// May buffer additional tokens and record skipped trivia, but does not
|
||||||
|
/// consume any significant token.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if the token at that position is not a keyword or if
|
||||||
|
/// the stream ends before that position.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn peek_keyword_at(&mut self, lookahead: usize) -> Option<Keyword> {
|
||||||
|
match self.peek_token_at(lookahead) {
|
||||||
|
Some(Token::Keyword(keyword)) => Some(keyword),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the position of the next significant token without consuming it.
|
||||||
|
///
|
||||||
|
/// Generates an error with `error_kind` if no tokens remain.
|
||||||
|
pub(crate) fn require_position(
|
||||||
|
&mut self,
|
||||||
|
error_kind: parser::ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
||||||
|
self.peek_position()
|
||||||
|
.ok_or_else(|| self.make_error_here(error_kind))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next significant token and its position without consuming
|
||||||
|
/// it.
|
||||||
|
///
|
||||||
|
/// Generates an error with `error_kind` if no tokens remain.
|
||||||
|
pub(crate) fn require_token_and_position(
|
||||||
|
&mut self,
|
||||||
|
error_kind: parser::ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, (Token, TokenPosition)> {
|
||||||
|
self.peek_token_and_position()
|
||||||
|
.ok_or_else(|| self.make_error_here(error_kind))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next significant token, its lexeme, and its position
|
||||||
|
/// without consuming them.
|
||||||
|
///
|
||||||
|
/// Generates an error with `error_kind` if no tokens remain.
|
||||||
|
pub(crate) fn require_token_lexeme_and_position(
|
||||||
|
&mut self,
|
||||||
|
error_kind: parser::ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> {
|
||||||
|
self.peek_token_lexeme_and_position()
|
||||||
|
.ok_or_else(|| self.make_error_here(error_kind))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advances by one significant token.
|
/// Advances by one significant token.
|
||||||
///
|
///
|
||||||
/// Trivia is internally handled and recorded.
|
/// Records any skipped trivia and returns the consumed token position.
|
||||||
/// Does nothing at the end-of-file.
|
/// Returns [`None`] if no significant tokens remain.
|
||||||
pub(crate) fn advance(&mut self) {
|
pub(crate) fn advance(&mut self) -> Option<TokenPosition> {
|
||||||
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
|
self.cursor.ensure_lookahead_available(0, &mut self.trivia);
|
||||||
if let Some((location, _)) = self.cursor.lookahead_buffer.pop_front() {
|
if let Some((token_position, _)) = self.cursor.lookahead_buffer.pop_front() {
|
||||||
self.cursor.previous_location = Some(location);
|
self.cursor.last_consumed_position = Some(token_position);
|
||||||
|
Some(token_position)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If the next token equals `token`, consumes it and returns `true`.
|
/// If the next significant token equals `token`, consumes it and
|
||||||
|
/// returns `true`.
|
||||||
///
|
///
|
||||||
/// Otherwise leaves the cursor unchanged and returns `false`.
|
/// Otherwise leaves the cursor unchanged and returns `false`.
|
||||||
/// Trivia is recorded automatically.
|
#[must_use]
|
||||||
pub(crate) fn eat(&mut self, token: Token) -> bool {
|
pub(crate) fn eat(&mut self, token: Token) -> bool {
|
||||||
let correct_token = self.peek_token() == Some(token);
|
if self.peek_token() == Some(token) {
|
||||||
if correct_token {
|
|
||||||
self.advance();
|
self.advance();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
}
|
}
|
||||||
correct_token
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Centralized peek used by public peekers.
|
/// If the next significant token corresponds to the given keyword,
|
||||||
fn peek_entry(&mut self) -> Option<&(TokenLocation, crate::lexer::TokenPiece<'src>)> {
|
/// consumes it and returns `true`.
|
||||||
self.cursor.ensure_min_lookahead(0, &mut self.trivia);
|
|
||||||
self.cursor.lookahead_buffer.front()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Expects `expected` at the current position.
|
|
||||||
///
|
///
|
||||||
/// On match consumes the token and returns its [`TokenLocation`].
|
/// Otherwise leaves the cursor unchanged and returns `false`.
|
||||||
/// Otherwise returns a [`crate::parser::ParseError`] of
|
#[must_use]
|
||||||
/// the given [`crate::parser::ParseErrorKind`] that carries the current
|
pub(crate) fn eat_keyword(&mut self, keyword: Keyword) -> bool {
|
||||||
/// span for diagnostics.
|
self.eat(Token::Keyword(keyword))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects `expected` token as the next significant one.
|
||||||
|
///
|
||||||
|
/// On match consumes the token and returns its [`TokenPosition`].
|
||||||
|
/// Otherwise returns an error of `error_kind` anchored at
|
||||||
|
/// the current token, or at the last consumed token if the stream is
|
||||||
|
/// exhausted. That error also gets set a blame span that contains exactly
|
||||||
|
/// that anchor point.
|
||||||
pub(crate) fn expect(
|
pub(crate) fn expect(
|
||||||
&mut self,
|
&mut self,
|
||||||
expected: Token,
|
expected: Token,
|
||||||
error_kind: crate::parser::ParseErrorKind,
|
error_kind: parser::ParseErrorKind,
|
||||||
) -> crate::parser::ParseResult<'src, 'arena, TokenLocation> {
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
||||||
let token_position = self.peek_location();
|
// Anchors EOF diagnostics at the last consumed token
|
||||||
// `Token` only includes type information, so comparison is valid
|
// when no current token exists.
|
||||||
|
let anchor = self
|
||||||
|
.peek_position()
|
||||||
|
.unwrap_or_else(|| self.last_consumed_position_or_start());
|
||||||
|
// `Token` equality is enough here because lexeme and position
|
||||||
|
// are stored separately.
|
||||||
if self.peek_token() == Some(expected) {
|
if self.peek_token() == Some(expected) {
|
||||||
self.advance();
|
self.advance();
|
||||||
Ok(token_position)
|
Ok(anchor)
|
||||||
} else {
|
} else {
|
||||||
Err(crate::parser::ParseError {
|
Err(self
|
||||||
kind: error_kind,
|
.make_error_at(error_kind, anchor)
|
||||||
source_span: crate::ast::AstSpan::new(token_position),
|
.blame(AstSpan::new(anchor)))
|
||||||
})
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expects `expected` keyword as the next significant token.
|
||||||
|
///
|
||||||
|
/// On match consumes the keyword and returns its [`TokenPosition`].
|
||||||
|
/// Otherwise returns an error of `error_kind` anchored at the current
|
||||||
|
/// token, or at the last consumed token if the stream is exhausted.
|
||||||
|
pub(crate) fn expect_keyword(
|
||||||
|
&mut self,
|
||||||
|
expected: Keyword,
|
||||||
|
error_kind: parser::ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, TokenPosition> {
|
||||||
|
self.expect(Token::Keyword(expected), error_kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns position of the last significant token that was actually
|
||||||
|
/// consumed by [`parser::Parser::advance`].
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if no tokens have been consumed yet.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) const fn last_consumed_position(&self) -> Option<TokenPosition> {
|
||||||
|
self.cursor.last_consumed_position
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the position of the last significant token consumed by
|
||||||
|
/// [`parser::Parser::advance`], or the start of the stream if no token has
|
||||||
|
/// been consumed yet.
|
||||||
|
///
|
||||||
|
/// Useful when diagnostics need a stable anchor even at the beginning of
|
||||||
|
/// input.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn last_consumed_position_or_start(&self) -> TokenPosition {
|
||||||
|
self.cursor
|
||||||
|
.last_consumed_position
|
||||||
|
.unwrap_or(TokenPosition(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ensures that parsing has advanced past `old_position`.
|
||||||
|
///
|
||||||
|
/// This is intended as a safeguard against infinite-loop bugs while
|
||||||
|
/// recovering from invalid input. In debug builds it asserts that progress
|
||||||
|
/// was made; in release builds it consumes one significant token when
|
||||||
|
/// the parser stalls.
|
||||||
|
#[track_caller]
|
||||||
|
pub(crate) fn ensure_forward_progress(&mut self, old_position: TokenPosition) {
|
||||||
|
if let Some(peeked_position) = self.peek_position() {
|
||||||
|
debug_assert!(
|
||||||
|
peeked_position > old_position,
|
||||||
|
"parser made no forward progress"
|
||||||
|
);
|
||||||
|
if peeked_position <= old_position {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
//! Submodule with parsing related errors.
|
//! Submodule with parsing related errors.
|
||||||
|
|
||||||
use crate::ast::AstSpan;
|
use crate::{ast::AstSpan, lexer::TokenPosition};
|
||||||
|
|
||||||
/// Internal parse error kinds.
|
/// Internal parse error kinds.
|
||||||
///
|
///
|
||||||
@ -14,13 +14,89 @@ use crate::ast::AstSpan;
|
|||||||
/// `UnexpectedToken`, `MultipleDefaults`, etc.).
|
/// `UnexpectedToken`, `MultipleDefaults`, etc.).
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
pub enum ParseErrorKind {
|
pub enum ParseErrorKind {
|
||||||
|
// ================== New errors that are 100% used! ==================
|
||||||
|
// headline: empty parenthesized expression
|
||||||
|
// primary label on ): expected an expression before this \)'`
|
||||||
|
// secondary label on (: parenthesized expression starts here
|
||||||
|
// Remove the parentheses or put an expression inside them.
|
||||||
|
ParenthesizedExpressionEmpty {
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: missing type argument in \class<...>``
|
||||||
|
// primary label on > or insertion site: expected a type name here
|
||||||
|
// secondary label on < or on class: type argument list starts here
|
||||||
|
// help: Write a type name, for example \class<Pawn>`.`
|
||||||
|
ClassTypeMissingTypeArgument {
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: missing closing \>` in `class<...>``
|
||||||
|
// primary label on offending following token or EOF: expected \>` before this token` or at EOF: expected \>` here`
|
||||||
|
// secondary label on <: this \<` starts the type argument`
|
||||||
|
// help: Add \>` to close the class type expression.`
|
||||||
|
ClassTypeMissingClosingAngleBracket {
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: missing closing \)'`
|
||||||
|
// primary label on the point where ) was expected: expected \)' here` or, if you have a real token there, expected \)' before this token`
|
||||||
|
// secondary label on the opening (: this \(` starts the parenthesized expression`
|
||||||
|
// help: Add \)' to close the expression.`
|
||||||
|
ParenthesizedExpressionMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: expected expression
|
||||||
|
// primary label: this token cannot start an expression
|
||||||
|
// optional help: Expressions can start with literals, identifiers, \(`, `{`, or expression keywords.`
|
||||||
|
ExpressionExpected,
|
||||||
|
// headline: invalid type argument in \class<...>``
|
||||||
|
// primary label on the bad token inside the angle brackets: expected a qualified type name here
|
||||||
|
// secondary label on class or <: while parsing this class type expression
|
||||||
|
// note: Only a type name is accepted between \<` and `>` here.`
|
||||||
|
ClassTypeInvalidTypeArgument {
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: too many arguments in \new(...)``
|
||||||
|
// primary label on the fourth argument, or on the comma before it if that is easier: unexpected extra argument
|
||||||
|
// secondary label on the opening (: this argument list accepts at most three arguments
|
||||||
|
// note: The three slots are \outer`, `name`, and `flags`.`
|
||||||
|
// help: Remove the extra argument.
|
||||||
|
NewTooManyArguments {
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// headline: missing closing \)' in `new(...)``
|
||||||
|
// primary label: expected \)' here`
|
||||||
|
// secondary label on the opening (: this argument list starts here
|
||||||
|
// help: Add \)' to close the argument list.`
|
||||||
|
NewMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// missing class specifier in \new` expression`
|
||||||
|
// Primary label on the first token where a class specifier should have started: expected a class specifier here
|
||||||
|
// Secondary label on new: \new` expression starts here` If there was an argument list, an additional secondary on ( is also reasonable: optional \new(...)` arguments end here`
|
||||||
|
// Help: Add the class or expression to instantiate after \new` or `new(...)`.`
|
||||||
|
NewMissingClassSpecifier {
|
||||||
|
new_keyword_position: TokenPosition,
|
||||||
|
},
|
||||||
|
// ================== Old errors to be thrown away! ==================
|
||||||
/// Expression inside `(...)` could not be parsed and no closing `)`
|
/// Expression inside `(...)` could not be parsed and no closing `)`
|
||||||
/// was found.
|
/// was found.
|
||||||
ExpressionMissingClosingParenthesis,
|
FunctionCallMissingClosingParenthesis,
|
||||||
/// A `do` block was not followed by a matching `until`.
|
/// A `do` block was not followed by a matching `until`.
|
||||||
DoMissingUntil,
|
DoMissingUntil,
|
||||||
/// Found an unexpected token while parsing an expression.
|
/// Found an unexpected token while parsing an expression.
|
||||||
ExpressionUnexpectedToken,
|
ExpressionUnexpectedToken,
|
||||||
|
DeclEmptyVariableDeclarations,
|
||||||
|
DeclNoSeparatorBetweenVariableDeclarations,
|
||||||
|
DeclExpectedRightBracketAfterArraySize,
|
||||||
|
DeclExpectedCommaAfterVariableDeclarator,
|
||||||
|
TypeSpecExpectedType,
|
||||||
|
TypeSpecInvalidNamedTypeName,
|
||||||
|
|
||||||
|
TypeSpecArrayMissingOpeningAngle,
|
||||||
|
TypeSpecArrayMissingInnerType,
|
||||||
|
TypeSpecArrayMissingClosingAngle,
|
||||||
|
|
||||||
|
TypeSpecClassMissingInnerType,
|
||||||
|
TypeSpecClassMissingClosingAngle,
|
||||||
/// A `for` loop is missing its opening `(`.
|
/// A `for` loop is missing its opening `(`.
|
||||||
ForMissingOpeningParenthesis,
|
ForMissingOpeningParenthesis,
|
||||||
/// The first `;` in `for (init; cond; step)` is missing.
|
/// The first `;` in `for (init; cond; step)` is missing.
|
||||||
@ -33,6 +109,7 @@ pub enum ParseErrorKind {
|
|||||||
BlockMissingSemicolonAfterExpression,
|
BlockMissingSemicolonAfterExpression,
|
||||||
/// A statement inside a block is not terminated with `;`.
|
/// A statement inside a block is not terminated with `;`.
|
||||||
BlockMissingSemicolonAfterStatement,
|
BlockMissingSemicolonAfterStatement,
|
||||||
|
BlockMissingClosingBrace,
|
||||||
/// `switch` has no body (missing matching braces).
|
/// `switch` has no body (missing matching braces).
|
||||||
SwitchMissingBody,
|
SwitchMissingBody,
|
||||||
/// The first top-level item in a `switch` body is not a `case`.
|
/// The first top-level item in a `switch` body is not a `case`.
|
||||||
@ -43,6 +120,7 @@ pub enum ParseErrorKind {
|
|||||||
SwitchDuplicateDefault,
|
SwitchDuplicateDefault,
|
||||||
/// Found `case` arms after a `default` branch.
|
/// Found `case` arms after a `default` branch.
|
||||||
SwitchCasesAfterDefault,
|
SwitchCasesAfterDefault,
|
||||||
|
SwitchMissingClosingBrace,
|
||||||
/// A `goto` was not followed by a label.
|
/// A `goto` was not followed by a label.
|
||||||
GotoMissingLabel,
|
GotoMissingLabel,
|
||||||
/// Unexpected end of input while parsing.
|
/// Unexpected end of input while parsing.
|
||||||
@ -75,6 +153,184 @@ pub enum ParseErrorKind {
|
|||||||
/// Expected one of: integer, float, string, `true`, `false`, `none`
|
/// Expected one of: integer, float, string, `true`, `false`, `none`
|
||||||
/// or an identifier.
|
/// or an identifier.
|
||||||
DeclarationLiteralUnexpectedToken,
|
DeclarationLiteralUnexpectedToken,
|
||||||
|
/// A class name was expected, but the current token is not an identifier.
|
||||||
|
///
|
||||||
|
/// Emitted when parsing `class Foo` and the token after `class` is not an
|
||||||
|
/// identifier (so its string value cannot be extracted).
|
||||||
|
ClassNameNotIdentifier,
|
||||||
|
/// A parent class name after `extends` was expected, but the token is not
|
||||||
|
/// an identifier.
|
||||||
|
///
|
||||||
|
/// Emitted when parsing `class Foo extends Bar` and the token after
|
||||||
|
/// `extends` is not an identifier.
|
||||||
|
ClassParentNameNotIdentifier,
|
||||||
|
/// A class declaration was not terminated with `;`.
|
||||||
|
///
|
||||||
|
/// Emitted when the parser reaches the end of a class definition but
|
||||||
|
/// does not encounter the required semicolon.
|
||||||
|
ClassMissingSemicolon,
|
||||||
|
/// An identifier was expected inside optional parentheses, but the token
|
||||||
|
/// is not an identifier.
|
||||||
|
///
|
||||||
|
/// Emitted by helpers that parse either `(<Ident>)` or bare `<Ident>`.
|
||||||
|
ParenthesisedIdentifierNameNotIdentifier,
|
||||||
|
/// A `(` was seen before an identifier, but the matching `)` was not found.
|
||||||
|
///
|
||||||
|
/// Emitted when parsing a parenthesised identifier like `(Foo)`.
|
||||||
|
ParenthesisedIdentifierMissingClosingParenthesis,
|
||||||
|
/// `HideCategories` is missing the opening `(` before the category list.
|
||||||
|
///
|
||||||
|
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
|
||||||
|
HideCategoriesMissingOpeningParenthesis,
|
||||||
|
/// `HideCategories` is missing the closing `)` after the category list.
|
||||||
|
HideCategoriesMissingClosingParenthesis,
|
||||||
|
/// `HideCategories` is missing the opening `(` before the category list.
|
||||||
|
///
|
||||||
|
/// Expected syntax: `HideCategories(CategoryA, CategoryB, ...)`.
|
||||||
|
ShowCategoriesMissingOpeningParenthesis,
|
||||||
|
/// `HideCategories` is missing the closing `)` after the category list.
|
||||||
|
ShowCategoriesMissingClosingParenthesis,
|
||||||
|
/// `Within` must be followed by a class or package name identifier.
|
||||||
|
///
|
||||||
|
/// Example: `Within(MyOuterClass)`.
|
||||||
|
WithinNameNotIdentifier,
|
||||||
|
/// `operator` modifier is missing the opening `(` before
|
||||||
|
/// the precedence rank.
|
||||||
|
///
|
||||||
|
/// Expected syntax: `operator(<integer>)`.
|
||||||
|
OperatorMissingOpeningParenthesis,
|
||||||
|
/// `operator(<...>)` must contain an integer literal precedence rank.
|
||||||
|
///
|
||||||
|
/// Emitted when the token inside parentheses is not an integer literal.
|
||||||
|
OperatorPrecedenceNotIntegerLiteral,
|
||||||
|
/// `operator(<integer>` is missing the closing `)`.
|
||||||
|
OperatorMissingClosingParenthesis,
|
||||||
|
ParamInvalidTypeName,
|
||||||
|
ParamMissingIdentifier,
|
||||||
|
FunctionReturnTypeNotTypeName,
|
||||||
|
FunctionNameNotIdentifier,
|
||||||
|
FunctionParamsMissingOpeningParenthesis,
|
||||||
|
FunctionParamsMissingClosingParenthesis,
|
||||||
|
ClassUnexpectedItem,
|
||||||
|
EnumMissingLeftBrace,
|
||||||
|
EnumBadVariant,
|
||||||
|
StructFieldMissingName,
|
||||||
|
StructFieldMissingSemicolon,
|
||||||
|
StructMissingRightBrace,
|
||||||
|
// Named enum/struct typedefs
|
||||||
|
EnumMissingKeyword, // class member: expected `enum`
|
||||||
|
EnumExpectedNameOrBrace, // after `enum`, expected identifier
|
||||||
|
EnumNoClosingBrace,
|
||||||
|
EnumEmptyVariants,
|
||||||
|
EnumNoSeparatorBetweenVariants,
|
||||||
|
EnumMissingLBrace,
|
||||||
|
StructMissingKeyword, // class member: expected `struct`
|
||||||
|
StructExpectedNameOrBrace, // after `struct`, expected identifier
|
||||||
|
StructExpectedExtendsOrBrace,
|
||||||
|
StructMissingLeftBrace,
|
||||||
|
StructExpectedBaseName,
|
||||||
|
StructBodyUnexpectedItem,
|
||||||
|
CppDirectiveMissingCppBlock,
|
||||||
|
|
||||||
|
// var(...) field decls
|
||||||
|
VarMissingKeyword, // class member: expected `var`
|
||||||
|
VarSpecsMissingOpeningParenthesis, // after `var`, expected '('
|
||||||
|
VarSpecNotIdentifier, // inside var(...), expected identifier
|
||||||
|
VarSpecsMissingClosingParenthesis, // var(...) missing ')'
|
||||||
|
|
||||||
|
// Generic decl end
|
||||||
|
DeclMissingSemicolon, // class-level declaration missing `;`
|
||||||
|
// --- Replication ---
|
||||||
|
ReplicationMissingReliability,
|
||||||
|
ReplicationIfMissingOpeningParenthesis,
|
||||||
|
ReplicationIfMissingClosingParenthesis,
|
||||||
|
ReplicationMemberNotIdentifier,
|
||||||
|
ReplicationMemberMissingClosingParenthesis,
|
||||||
|
ReplicationRuleMissingSemicolon,
|
||||||
|
ReplicationMissingKeyword,
|
||||||
|
ReplicationMissingLBrace,
|
||||||
|
ReplicationMissingRBrace,
|
||||||
|
|
||||||
|
// --- DefaultProperties ---
|
||||||
|
DefaultPropPathExpectedIdentifier,
|
||||||
|
DefaultPropIndexNotIntegerLiteral,
|
||||||
|
DefaultPropIndexMissingClosingParenthesis,
|
||||||
|
DefaultPropAssignMissingEq,
|
||||||
|
DefaultPropsMissingKeyword,
|
||||||
|
DefaultPropsMissingLBrace,
|
||||||
|
DefaultPropsMissingRBrace,
|
||||||
|
|
||||||
|
// --- Begin/End Object headers ---
|
||||||
|
ObjectBeginMissingKeyword,
|
||||||
|
ObjectMissingKeyword,
|
||||||
|
ObjectHeaderKeyNotIdentifier,
|
||||||
|
ObjectHeaderMissingEq,
|
||||||
|
|
||||||
|
// --- State / ignores ---
|
||||||
|
IgnoresItemNotIdentifier,
|
||||||
|
IgnoresMissingSemicolon,
|
||||||
|
StateMissingKeyword,
|
||||||
|
StateNameNotIdentifier,
|
||||||
|
StateParentNameNotIdentifier,
|
||||||
|
StateMissingLBrace,
|
||||||
|
StateMissingRBrace,
|
||||||
|
|
||||||
|
ClassMissingKeyword,
|
||||||
|
TypeMissingLT,
|
||||||
|
TypeMissingGT,
|
||||||
|
StateParensMissingRParen,
|
||||||
|
BadTypeInClassTypeDeclaration,
|
||||||
|
IdentifierExpected,
|
||||||
|
|
||||||
|
// --- Generic list diagnostics (comma-separated, closed by `)`) ---
|
||||||
|
/// Saw `)` immediately after `(`, or closed the list without any items.
|
||||||
|
/// Use when a construct requires at least one item: e.g. `HideCategories(...)`.
|
||||||
|
ListEmpty,
|
||||||
|
|
||||||
|
/// Parser was positioned where an item was required but found neither an
|
||||||
|
/// item nor a terminator. Typical triggers:
|
||||||
|
/// - Leading comma: `(, Foo)`
|
||||||
|
/// - Double comma: `(Foo,, Bar)`
|
||||||
|
/// - Garbage in place of an item: `(@@, Foo)`
|
||||||
|
///
|
||||||
|
/// Recovery: skip to next comma or `)`.
|
||||||
|
ListMissingIdentifierBeforeSeparator,
|
||||||
|
|
||||||
|
/// Parser was positioned where an item was required but found neither an
|
||||||
|
/// item nor a terminator. Typical triggers:
|
||||||
|
/// - Leading comma: `(, Foo)`
|
||||||
|
/// - Double comma: `(Foo,, Bar)`
|
||||||
|
/// - Garbage in place of an item: `(@@, Foo)`
|
||||||
|
///
|
||||||
|
/// Recovery: skip to next comma or `)`.
|
||||||
|
ListInvalidIdentifier,
|
||||||
|
|
||||||
|
/// Two items without a comma (or some token after an item where a comma
|
||||||
|
/// was required). Typical triggers:
|
||||||
|
/// - Adjacent identifiers: `(Foo Bar)`
|
||||||
|
/// - Token after an item where only `,` or `)` are valid.
|
||||||
|
///
|
||||||
|
/// Recovery: behave as if a comma were present; continue with the next item.
|
||||||
|
ListMissingSeparator,
|
||||||
|
|
||||||
|
/// Comma directly before `)`: `(Foo, )`.
|
||||||
|
/// Treat as a soft error or warning, depending on your policy.
|
||||||
|
ListTrailingSeparator,
|
||||||
|
FunctionArgumentMissingComma,
|
||||||
|
// Expression was required, but none started
|
||||||
|
MissingExpression,
|
||||||
|
MissingBranchBody,
|
||||||
|
CallableExpectedHeader,
|
||||||
|
CallableExpectedKind,
|
||||||
|
CallableOperatorInvalidPrecedence,
|
||||||
|
CallableMissingBodyOrSemicolon,
|
||||||
|
CallableNameNotIdentifier,
|
||||||
|
CallablePrefixOperatorInvalidSymbol,
|
||||||
|
CallableInfixOperatorInvalidSymbol,
|
||||||
|
CallablePostfixOperatorInvalidSymbol,
|
||||||
|
CallableParamsMissingOpeningParenthesis,
|
||||||
|
CallableParamsMissingClosingParenthesis,
|
||||||
|
NativeModifierIdNotIntegerLiteral,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
/// Enumerates all specific kinds of parsing errors that the parser can emit.
|
||||||
@ -83,18 +339,32 @@ pub enum ParseErrorKind {
|
|||||||
pub struct ParseError {
|
pub struct ParseError {
|
||||||
/// The specific kind of parse error that occurred.
|
/// The specific kind of parse error that occurred.
|
||||||
pub kind: ParseErrorKind,
|
pub kind: ParseErrorKind,
|
||||||
|
pub anchor: TokenPosition,
|
||||||
|
/// Where the user should look first.
|
||||||
|
pub blame_span: AstSpan,
|
||||||
/// The source span in which the error was detected.
|
/// The source span in which the error was detected.
|
||||||
pub source_span: AstSpan,
|
pub covered_span: AstSpan,
|
||||||
|
pub related_span: Option<AstSpan>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>;
|
pub type ParseResult<'src, 'arena, T> = Result<T, ParseError>;
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
impl crate::parser::Parser<'_, '_> {
|
||||||
#[must_use]
|
pub(crate) fn make_error_here(&self, error_kind: ParseErrorKind) -> ParseError {
|
||||||
pub(crate) fn make_error_here(&mut self, error_kind: ParseErrorKind) -> ParseError {
|
self.make_error_at(error_kind, self.last_consumed_position_or_start())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn make_error_at(
|
||||||
|
&self,
|
||||||
|
error_kind: ParseErrorKind,
|
||||||
|
position: TokenPosition,
|
||||||
|
) -> ParseError {
|
||||||
ParseError {
|
ParseError {
|
||||||
kind: error_kind,
|
kind: error_kind,
|
||||||
source_span: AstSpan::new(self.peek_location()),
|
anchor: position,
|
||||||
|
blame_span: AstSpan::new(position),
|
||||||
|
covered_span: AstSpan::new(position),
|
||||||
|
related_span: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,60 +0,0 @@
|
|||||||
use crate::ast::Expression;
|
|
||||||
use crate::lexer::Token;
|
|
||||||
use crate::parser::ParseErrorKind;
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses a block `{ ... }` after `{`.
|
|
||||||
///
|
|
||||||
/// Consumes tokens until the matching `}` and returns
|
|
||||||
/// an [`Expression::Block`] spanning from the opening `{` to
|
|
||||||
/// the closing `}`.
|
|
||||||
/// Returns a best-effort block on premature end-of-file.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_block_cont(
|
|
||||||
&mut self,
|
|
||||||
block_start_location: crate::lexer::TokenLocation,
|
|
||||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
|
||||||
let mut statements = self.arena.vec();
|
|
||||||
let mut tail = None;
|
|
||||||
loop {
|
|
||||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
|
||||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
|
||||||
return self.arena.alloc(
|
|
||||||
Expression::Block { statements, tail },
|
|
||||||
crate::ast::AstSpan {
|
|
||||||
from: block_start_location,
|
|
||||||
to: self.peek_location(),
|
|
||||||
},
|
|
||||||
);
|
|
||||||
};
|
|
||||||
if let Token::RightBrace = token {
|
|
||||||
self.advance(); // '}'
|
|
||||||
let block_span = crate::ast::AstSpan {
|
|
||||||
from: block_start_location,
|
|
||||||
to: token_location,
|
|
||||||
};
|
|
||||||
return self
|
|
||||||
.arena
|
|
||||||
.alloc(Expression::Block { statements, tail }, block_span);
|
|
||||||
}
|
|
||||||
// We know that at this point:
|
|
||||||
// 1. There is still a token and it is not end-of-file;
|
|
||||||
// 2. It isn't end of the block.
|
|
||||||
// So having a tail statement there is a problem!
|
|
||||||
if let Some(tail_expression) = tail {
|
|
||||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
|
|
||||||
let tail_span = *tail_expression.span();
|
|
||||||
let node = self.arena.alloc(
|
|
||||||
crate::ast::Statement::Expression(tail_expression),
|
|
||||||
tail_span,
|
|
||||||
);
|
|
||||||
statements.push(node);
|
|
||||||
}
|
|
||||||
tail = self.parse_block_item(&mut statements);
|
|
||||||
// Ensure forward progress under errors to avoid infinite loops.
|
|
||||||
if self.peek_location() <= token_location {
|
|
||||||
self.advance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
959
rottlib/src/parser/grammar/class.rs
Normal file
959
rottlib/src/parser/grammar/class.rs
Normal file
@ -0,0 +1,959 @@
|
|||||||
|
// rottlib/src/parser/grammar/class.rs
|
||||||
|
|
||||||
|
#![allow(clippy::all, clippy::pedantic, clippy::nursery)]
|
||||||
|
|
||||||
|
use crate::ast::{
|
||||||
|
AstSpan, BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition,
|
||||||
|
ClassMember, ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef,
|
||||||
|
DeclarationLiteral, DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef,
|
||||||
|
IdentifierToken, Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule,
|
||||||
|
ReplicationRuleRef, StateDecl, StateDeclRef, StateModifier, VariableDeclarator,
|
||||||
|
VariableDeclaratorRef,
|
||||||
|
};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
#[inline]
|
||||||
|
pub fn ensure_progress_or_break(&mut self, before: TokenPosition) -> bool {
|
||||||
|
match self.peek_position() {
|
||||||
|
Some(position) if position > before => true,
|
||||||
|
_ => self.advance().is_some(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_exec_directive(&mut self) -> ParseResult<'src, 'arena, ExecDirectiveRef<'arena>> {
|
||||||
|
let (token, lexeme, start_position) =
|
||||||
|
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
|
||||||
|
debug_assert!(matches!(token, Token::ExecDirective));
|
||||||
|
|
||||||
|
let trimmed = lexeme.trim_end_matches(['\r', '\n']);
|
||||||
|
self.advance();
|
||||||
|
|
||||||
|
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
ExecDirective {
|
||||||
|
text: self.arena.string(trimmed),
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_parenthesised_identifier(&mut self) -> ParseResult<'src, 'arena, IdentifierToken> {
|
||||||
|
let has_opening_parenthesis = self.eat(Token::LeftParenthesis);
|
||||||
|
let identifier =
|
||||||
|
self.parse_identifier(ParseErrorKind::ParenthesisedIdentifierNameNotIdentifier)?;
|
||||||
|
if has_opening_parenthesis {
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
Ok(identifier)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn lookahead_state_after_mods(&mut self) -> bool {
|
||||||
|
let mut lookahead = 0;
|
||||||
|
loop {
|
||||||
|
match self.peek_keyword_at(lookahead) {
|
||||||
|
Some(Keyword::Auto | Keyword::Simulated) => {
|
||||||
|
lookahead += 1;
|
||||||
|
}
|
||||||
|
Some(Keyword::State) => return true,
|
||||||
|
_ => return false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_array_len_expr(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, Option<ExpressionRef<'src, 'arena>>> {
|
||||||
|
if !self.eat(Token::LeftBracket) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let expression = self.parse_expression();
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::RightBracket,
|
||||||
|
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position: self.last_consumed_position_or_start(),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseBracket)?;
|
||||||
|
|
||||||
|
Ok(Some(expression))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_class_declaration_modifier(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, Option<ClassModifierRef<'arena>>> {
|
||||||
|
use ClassModifier::{
|
||||||
|
Abstract, CacheExempt, CollapseCategories, Config, Const, DependsOn, Deprecated,
|
||||||
|
DontCollapseCategories, DynamicRecompile, EditConst, EditInline, EditInlineNew, Export,
|
||||||
|
ExportStructs, Final, GlobalConfig, HideCategories, HideDropdown, Instanced, Localized,
|
||||||
|
Native, NativeReplication, NoExport, NotEditInlineNew, NotPlaceable, ParseConfig,
|
||||||
|
PerObjectConfig, Placeable, Private, Protected, Public, SafeReplace, ShowCategories,
|
||||||
|
Static, Transient, Within,
|
||||||
|
};
|
||||||
|
|
||||||
|
let Some((token, modifier_position)) = self.peek_token_and_position() else {
|
||||||
|
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut consumed_inside_match = false;
|
||||||
|
let mut span = AstSpan::new(modifier_position);
|
||||||
|
|
||||||
|
let modifier = match token {
|
||||||
|
Token::Keyword(Keyword::Final) => Final,
|
||||||
|
Token::Keyword(Keyword::Native) => Native,
|
||||||
|
Token::Keyword(Keyword::Abstract) => Abstract,
|
||||||
|
Token::Keyword(Keyword::Transient) => Transient,
|
||||||
|
Token::Keyword(Keyword::Public) => Public,
|
||||||
|
Token::Keyword(Keyword::Protected) => Protected,
|
||||||
|
Token::Keyword(Keyword::Private) => Private,
|
||||||
|
Token::Keyword(Keyword::Static) => Static,
|
||||||
|
Token::Keyword(Keyword::Const) => Const,
|
||||||
|
Token::Keyword(Keyword::Deprecated) => Deprecated,
|
||||||
|
Token::Keyword(Keyword::NoExport) => NoExport,
|
||||||
|
Token::Keyword(Keyword::Export) => Export,
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Config) => {
|
||||||
|
self.advance();
|
||||||
|
consumed_inside_match = true;
|
||||||
|
let value = if self.peek_token() == Some(Token::LeftParenthesis) {
|
||||||
|
Some(self.parse_parenthesised_identifier()?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
Config(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Localized) => Localized,
|
||||||
|
Token::Keyword(Keyword::Placeable) => Placeable,
|
||||||
|
Token::Keyword(Keyword::NotPlaceable) => NotPlaceable,
|
||||||
|
Token::Keyword(Keyword::Instanced) => Instanced,
|
||||||
|
Token::Keyword(Keyword::EditConst) => EditConst,
|
||||||
|
Token::Keyword(Keyword::EditInline) => EditInline,
|
||||||
|
Token::Keyword(Keyword::EditInlineNew) => EditInlineNew,
|
||||||
|
Token::Keyword(Keyword::NotEditInlineNew) => NotEditInlineNew,
|
||||||
|
Token::Keyword(Keyword::CollapseCategories) => CollapseCategories,
|
||||||
|
Token::Keyword(Keyword::DontCollapseCategories) => DontCollapseCategories,
|
||||||
|
Token::Keyword(Keyword::GlobalConfig) => GlobalConfig,
|
||||||
|
Token::Keyword(Keyword::PerObjectConfig) => PerObjectConfig,
|
||||||
|
Token::Keyword(Keyword::DynamicRecompile) => DynamicRecompile,
|
||||||
|
Token::Keyword(Keyword::CacheExempt) => CacheExempt,
|
||||||
|
Token::Keyword(Keyword::HideDropdown) => HideDropdown,
|
||||||
|
Token::Keyword(Keyword::ParseConfig) => ParseConfig,
|
||||||
|
Token::Keyword(Keyword::NativeReplication) => NativeReplication,
|
||||||
|
Token::Keyword(Keyword::ExportStructs) => ExportStructs,
|
||||||
|
Token::Keyword(Keyword::SafeReplace) => SafeReplace,
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::HideCategories) => {
|
||||||
|
self.advance();
|
||||||
|
consumed_inside_match = true;
|
||||||
|
self.expect(
|
||||||
|
Token::LeftParenthesis,
|
||||||
|
ParseErrorKind::HideCategoriesMissingOpeningParenthesis,
|
||||||
|
)?;
|
||||||
|
let categories = self.parse_identifier_list();
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::HideCategoriesMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
|
||||||
|
HideCategories(categories)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::ShowCategories) => {
|
||||||
|
self.advance();
|
||||||
|
consumed_inside_match = true;
|
||||||
|
self.expect(
|
||||||
|
Token::LeftParenthesis,
|
||||||
|
ParseErrorKind::ShowCategoriesMissingOpeningParenthesis,
|
||||||
|
)?;
|
||||||
|
let categories = self.parse_identifier_list();
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ShowCategoriesMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
|
||||||
|
ShowCategories(categories)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Within) => {
|
||||||
|
self.advance();
|
||||||
|
consumed_inside_match = true;
|
||||||
|
Within(self.parse_identifier(ParseErrorKind::WithinNameNotIdentifier)?)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::DependsOn) => {
|
||||||
|
self.advance();
|
||||||
|
consumed_inside_match = true;
|
||||||
|
DependsOn(self.parse_parenthesised_identifier()?)
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => return Ok(None),
|
||||||
|
};
|
||||||
|
|
||||||
|
if !consumed_inside_match {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
span.extend_to(self.last_consumed_position_or_start());
|
||||||
|
Ok(Some(self.arena.alloc_node(modifier, span)))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_class_header_cont(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ClassDeclaration<'arena>>
|
||||||
|
where
|
||||||
|
'src: 'arena,
|
||||||
|
{
|
||||||
|
let class_name = self.parse_identifier(ParseErrorKind::ClassNameNotIdentifier)?;
|
||||||
|
|
||||||
|
let parent_class_name = if self.eat_keyword(Keyword::Extends) {
|
||||||
|
let qualified_parent =
|
||||||
|
self.parse_qualified_identifier(ParseErrorKind::ClassParentNameNotIdentifier)?;
|
||||||
|
Some(qualified_parent)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut modifiers = Vec::new();
|
||||||
|
loop {
|
||||||
|
match self.parse_class_declaration_modifier() {
|
||||||
|
Ok(Some(next_modifier)) => modifiers.push(next_modifier),
|
||||||
|
Ok(None) => break,
|
||||||
|
Err(error) => {
|
||||||
|
self.report_error(error);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(Token::Semicolon, ParseErrorKind::ClassMissingSemicolon)?;
|
||||||
|
Ok(ClassDeclaration {
|
||||||
|
name: class_name,
|
||||||
|
parent: parent_class_name.map(|identifier| identifier.head()),
|
||||||
|
modifiers,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_class_var_decl(&mut self) -> ParseResult<'src, 'arena, ClassVarDeclRef<'src, 'arena>> {
|
||||||
|
let start_position = self.expect(
|
||||||
|
Token::Keyword(Keyword::Var),
|
||||||
|
ParseErrorKind::VarMissingKeyword,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let paren_specs = self.parse_var_editor_specifier_list();
|
||||||
|
let modifiers = self.parse_var_declaration_modifiers();
|
||||||
|
let type_spec = self.parse_type_specifier()?;
|
||||||
|
let declarators = self.parse_class_var_declarators();
|
||||||
|
|
||||||
|
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
|
||||||
|
|
||||||
|
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
ClassVarDecl {
|
||||||
|
paren_specs,
|
||||||
|
modifiers,
|
||||||
|
type_spec,
|
||||||
|
declarators,
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_replication_rule(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ReplicationRuleRef<'src, 'arena>> {
|
||||||
|
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
|
||||||
|
|
||||||
|
let reliability = match self.peek_token() {
|
||||||
|
Some(Token::Keyword(Keyword::Reliable)) => {
|
||||||
|
self.advance();
|
||||||
|
Reliability::Reliable
|
||||||
|
}
|
||||||
|
Some(Token::Keyword(Keyword::Unreliable)) => {
|
||||||
|
self.advance();
|
||||||
|
Reliability::Unreliable
|
||||||
|
}
|
||||||
|
_ => return Err(self.make_error_here(ParseErrorKind::ReplicationMissingReliability)),
|
||||||
|
};
|
||||||
|
|
||||||
|
let condition = if self.eat_keyword(Keyword::If) {
|
||||||
|
self.expect(
|
||||||
|
Token::LeftParenthesis,
|
||||||
|
ParseErrorKind::ReplicationIfMissingOpeningParenthesis,
|
||||||
|
)?;
|
||||||
|
let expression = self.parse_expression();
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ReplicationIfMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
|
||||||
|
Some(expression)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut members = self.arena.vec();
|
||||||
|
loop {
|
||||||
|
let identifier =
|
||||||
|
self.parse_identifier(ParseErrorKind::ReplicationMemberNotIdentifier)?;
|
||||||
|
members.push(identifier);
|
||||||
|
|
||||||
|
if self.eat(Token::LeftParenthesis) {
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ReplicationMemberMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.eat(Token::Comma) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::Semicolon,
|
||||||
|
ParseErrorKind::ReplicationRuleMissingSemicolon,
|
||||||
|
)?;
|
||||||
|
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
ReplicationRule {
|
||||||
|
reliability,
|
||||||
|
condition,
|
||||||
|
members,
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_replication_block(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ReplicationBlockRef<'src, 'arena>> {
|
||||||
|
let start_position = self.expect(
|
||||||
|
Token::Keyword(Keyword::Replication),
|
||||||
|
ParseErrorKind::ReplicationMissingKeyword,
|
||||||
|
)?;
|
||||||
|
self.expect(Token::LeftBrace, ParseErrorKind::ReplicationMissingLBrace)?;
|
||||||
|
|
||||||
|
let mut rules = self.arena.vec();
|
||||||
|
while !matches!(self.peek_token(), Some(Token::RightBrace)) {
|
||||||
|
let loop_start = self
|
||||||
|
.peek_position()
|
||||||
|
.unwrap_or_else(|| self.last_consumed_position_or_start());
|
||||||
|
|
||||||
|
if self.peek_token().is_none() {
|
||||||
|
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.parse_replication_rule() {
|
||||||
|
Ok(rule) => rules.push(rule),
|
||||||
|
Err(error) => {
|
||||||
|
self.report_error(error);
|
||||||
|
self.recover_until(SyncLevel::Statement);
|
||||||
|
let _ = self.eat(Token::Semicolon);
|
||||||
|
if !self.ensure_progress_or_break(loop_start) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.ensure_progress_or_break(loop_start) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(Token::RightBrace, ParseErrorKind::ReplicationMissingRBrace)?;
|
||||||
|
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
|
||||||
|
Ok(self
|
||||||
|
.arena
|
||||||
|
.alloc_node(ReplicationBlock { rules, span }, span))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_ignores_clause(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, Option<crate::arena::ArenaVec<'arena, IdentifierToken>>> {
|
||||||
|
if !self.eat_keyword(Keyword::Ignores) {
|
||||||
|
return Ok(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut identifiers = self.arena.vec();
|
||||||
|
loop {
|
||||||
|
let identifier = self.parse_identifier(ParseErrorKind::IgnoresItemNotIdentifier)?;
|
||||||
|
identifiers.push(identifier);
|
||||||
|
if !self.eat(Token::Comma) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(Token::Semicolon, ParseErrorKind::IgnoresMissingSemicolon)?;
|
||||||
|
Ok(Some(identifiers))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_state_decl(&mut self) -> ParseResult<'src, 'arena, StateDeclRef<'src, 'arena>> {
|
||||||
|
let start_position = self.require_position(ParseErrorKind::UnexpectedEndOfFile)?;
|
||||||
|
|
||||||
|
let mut modifiers = self.arena.vec();
|
||||||
|
loop {
|
||||||
|
match self.peek_keyword() {
|
||||||
|
Some(Keyword::Auto) => {
|
||||||
|
self.advance();
|
||||||
|
modifiers.push(StateModifier::Auto);
|
||||||
|
}
|
||||||
|
Some(Keyword::Simulated) => {
|
||||||
|
self.advance();
|
||||||
|
modifiers.push(StateModifier::Simulated);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::Keyword(Keyword::State),
|
||||||
|
ParseErrorKind::StateMissingKeyword,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.peek_keyword() {
|
||||||
|
Some(Keyword::Auto) => {
|
||||||
|
self.advance();
|
||||||
|
modifiers.push(StateModifier::Auto);
|
||||||
|
}
|
||||||
|
Some(Keyword::Simulated) => {
|
||||||
|
self.advance();
|
||||||
|
modifiers.push(StateModifier::Simulated);
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.eat(Token::LeftParenthesis) {
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::StateParensMissingRParen,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
let name = self.parse_identifier(ParseErrorKind::StateNameNotIdentifier)?;
|
||||||
|
let parent = if self.eat_keyword(Keyword::Extends) {
|
||||||
|
Some(self.parse_identifier(ParseErrorKind::StateParentNameNotIdentifier)?)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let opening_brace_position =
|
||||||
|
self.expect(Token::LeftBrace, ParseErrorKind::StateMissingLBrace)?;
|
||||||
|
let ignores = self.parse_ignores_clause()?;
|
||||||
|
let BlockBody {
|
||||||
|
statements: body,
|
||||||
|
span: inner_span,
|
||||||
|
} = self.parse_braced_block_statements_tail(opening_brace_position);
|
||||||
|
|
||||||
|
let span = AstSpan::range(start_position, inner_span.token_to);
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
StateDecl {
|
||||||
|
name,
|
||||||
|
parent,
|
||||||
|
modifiers,
|
||||||
|
ignores,
|
||||||
|
body,
|
||||||
|
span,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_class_definition_cont(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
|
||||||
|
let header = self.parse_class_header_cont()?;
|
||||||
|
let mut members = self.arena.vec();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let Some((token, member_start)) = self.peek_token_and_position() else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
|
||||||
|
match token {
|
||||||
|
Token::Keyword(Keyword::DefaultProperties) => break,
|
||||||
|
|
||||||
|
_ if self.lookahead_state_after_mods() => {
|
||||||
|
let state = self
|
||||||
|
.parse_state_decl()
|
||||||
|
.widen_error_span_from(member_start)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
let span = *state.span();
|
||||||
|
members.push(self.arena.alloc_node(ClassMember::State(state), span));
|
||||||
|
}
|
||||||
|
|
||||||
|
_ if self.is_callable_header_ahead() => {
|
||||||
|
let callable = self.parse_callable_definition();
|
||||||
|
let span = *callable.span();
|
||||||
|
members.push(self.arena.alloc_node(ClassMember::Function(callable), span));
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Const) => {
|
||||||
|
let constant = self
|
||||||
|
.parse_class_const_decl()
|
||||||
|
.widen_error_span_from(member_start)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
let span = *constant.span();
|
||||||
|
members.push(self.arena.alloc_node(ClassMember::Const(constant), span));
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Enum)
|
||||||
|
if !matches!(self.peek_token_at(1), Some(Token::LeftBrace)) =>
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
let enum_definition = self.parse_enum_definition_tail(member_start);
|
||||||
|
let span = *enum_definition.span();
|
||||||
|
members.push(
|
||||||
|
self.arena
|
||||||
|
.alloc_node(ClassMember::TypeDefEnum(enum_definition), span),
|
||||||
|
);
|
||||||
|
let _ = self.eat(Token::Semicolon);
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Struct) => {
|
||||||
|
self.advance();
|
||||||
|
let struct_definition = self.parse_struct_definition_tail(member_start);
|
||||||
|
let span = *struct_definition.span();
|
||||||
|
members.push(
|
||||||
|
self.arena
|
||||||
|
.alloc_node(ClassMember::TypeDefStruct(struct_definition), span),
|
||||||
|
);
|
||||||
|
let _ = self.eat(Token::Semicolon);
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Var) => {
|
||||||
|
let variable_declaration = self
|
||||||
|
.parse_class_var_decl()
|
||||||
|
.widen_error_span_from(member_start)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
let span = *variable_declaration.span();
|
||||||
|
members.push(
|
||||||
|
self.arena
|
||||||
|
.alloc_node(ClassMember::Var(variable_declaration), span),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Replication) => {
|
||||||
|
let replication = self
|
||||||
|
.parse_replication_block()
|
||||||
|
.widen_error_span_from(member_start)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
let span = *replication.span();
|
||||||
|
members.push(
|
||||||
|
self.arena
|
||||||
|
.alloc_node(ClassMember::Replication(replication), span),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::ExecDirective => {
|
||||||
|
let directive = self
|
||||||
|
.parse_exec_directive()
|
||||||
|
.widen_error_span_from(member_start)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
let span = *directive.span();
|
||||||
|
members.push(self.arena.alloc_node(ClassMember::Exec(directive), span));
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
|
||||||
|
self.advance();
|
||||||
|
if !self.eat(Token::CppBlock) {
|
||||||
|
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Keyword(Keyword::Class) => break,
|
||||||
|
|
||||||
|
Token::Semicolon => {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
|
||||||
|
while let Some(next_token) = self.peek_token() {
|
||||||
|
match next_token {
|
||||||
|
Token::Keyword(
|
||||||
|
Keyword::Function
|
||||||
|
| Keyword::Event
|
||||||
|
| Keyword::Enum
|
||||||
|
| Keyword::Struct
|
||||||
|
| Keyword::Var
|
||||||
|
| Keyword::Replication
|
||||||
|
| Keyword::State
|
||||||
|
| Keyword::Class
|
||||||
|
| Keyword::DefaultProperties,
|
||||||
|
) => break,
|
||||||
|
_ => {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.ensure_progress_or_break(member_start) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(ClassDefinition { header, members })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_source_file(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ClassDefinition<'src, 'arena>> {
|
||||||
|
loop {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::Semicolon) => {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
Some(Token::ExecDirective) => {
|
||||||
|
if let Err(error) = self.parse_exec_directive() {
|
||||||
|
self.report_error(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Some(Token::Keyword(Keyword::Class)) | None => break,
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::Keyword(Keyword::Class),
|
||||||
|
ParseErrorKind::ClassMissingKeyword,
|
||||||
|
)?;
|
||||||
|
let class_definition = self.parse_class_definition_cont()?;
|
||||||
|
|
||||||
|
if matches!(
|
||||||
|
self.peek_token(),
|
||||||
|
Some(Token::Keyword(Keyword::DefaultProperties))
|
||||||
|
) {
|
||||||
|
return Ok(class_definition);
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::Semicolon) => {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
Some(_) => {
|
||||||
|
self.report_error_here(ParseErrorKind::ClassUnexpectedItem);
|
||||||
|
while self.peek_token().is_some() {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
None => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(class_definition)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_signed_integer_literal(&self, s: &str) -> ParseResult<'src, 'arena, i128> {
|
||||||
|
let (negative, body) = if let Some(rest) = s.strip_prefix('-') {
|
||||||
|
(true, rest)
|
||||||
|
} else if let Some(rest) = s.strip_prefix('+') {
|
||||||
|
(false, rest)
|
||||||
|
} else {
|
||||||
|
(false, s)
|
||||||
|
};
|
||||||
|
|
||||||
|
let magnitude: u128 = self.decode_unsigned_integer_magnitude(body)?;
|
||||||
|
|
||||||
|
if negative {
|
||||||
|
const MIN_MAGNITUDE: u128 = 1u128 << 127;
|
||||||
|
if magnitude == MIN_MAGNITUDE {
|
||||||
|
Ok(i128::MIN)
|
||||||
|
} else {
|
||||||
|
let magnitude_as_i128 = i128::try_from(magnitude)
|
||||||
|
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))?;
|
||||||
|
Ok(-magnitude_as_i128)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
i128::try_from(magnitude)
|
||||||
|
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_unsigned_integer_magnitude(&self, body: &str) -> ParseResult<'src, 'arena, u128> {
|
||||||
|
use ParseErrorKind::InvalidNumericLiteral;
|
||||||
|
|
||||||
|
if body.is_empty() {
|
||||||
|
return Err(self.make_error_here(InvalidNumericLiteral));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (base, digits) =
|
||||||
|
if let Some(rest) = body.strip_prefix("0x").or_else(|| body.strip_prefix("0X")) {
|
||||||
|
(16u128, rest)
|
||||||
|
} else if let Some(rest) = body.strip_prefix("0b").or_else(|| body.strip_prefix("0B")) {
|
||||||
|
(2u128, rest)
|
||||||
|
} else if let Some(rest) = body.strip_prefix("0o").or_else(|| body.strip_prefix("0O")) {
|
||||||
|
(8u128, rest)
|
||||||
|
} else {
|
||||||
|
(10u128, body)
|
||||||
|
};
|
||||||
|
|
||||||
|
if digits.is_empty() {
|
||||||
|
return Err(self.make_error_here(InvalidNumericLiteral));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut accumulator: u128 = 0;
|
||||||
|
for character in digits.chars() {
|
||||||
|
if character == '_' {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let digit_value = match character {
|
||||||
|
'0'..='9' => u128::from(character as u32 - '0' as u32),
|
||||||
|
'a'..='f' => u128::from(10 + (character as u32 - 'a' as u32)),
|
||||||
|
'A'..='F' => u128::from(10 + (character as u32 - 'A' as u32)),
|
||||||
|
_ => return Err(self.make_error_here(InvalidNumericLiteral)),
|
||||||
|
};
|
||||||
|
if digit_value >= base {
|
||||||
|
return Err(self.make_error_here(InvalidNumericLiteral));
|
||||||
|
}
|
||||||
|
accumulator = accumulator
|
||||||
|
.checked_mul(base)
|
||||||
|
.and_then(|value| value.checked_add(digit_value))
|
||||||
|
.ok_or_else(|| self.make_error_here(InvalidNumericLiteral))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(accumulator)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_declaration_literal_class(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, DeclarationLiteralRef<'src, 'arena>> {
|
||||||
|
let (token, lexeme, token_position) =
|
||||||
|
self.require_token_lexeme_and_position(ParseErrorKind::UnexpectedEndOfFile)?;
|
||||||
|
|
||||||
|
let literal = match token {
|
||||||
|
Token::Plus | Token::Minus => {
|
||||||
|
let is_negative = matches!(token, Token::Minus);
|
||||||
|
self.advance();
|
||||||
|
|
||||||
|
let (next_token, next_lexeme, _) =
|
||||||
|
self.require_token_lexeme_and_position(ParseErrorKind::InvalidNumericLiteral)?;
|
||||||
|
|
||||||
|
match next_token {
|
||||||
|
Token::IntegerLiteral => {
|
||||||
|
let value = if is_negative {
|
||||||
|
self.decode_signed_integer_literal(&format!("-{next_lexeme}"))?
|
||||||
|
} else {
|
||||||
|
self.decode_signed_integer_literal(next_lexeme)?
|
||||||
|
};
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Integer(value)
|
||||||
|
}
|
||||||
|
Token::FloatLiteral => {
|
||||||
|
let mut signed_lexeme = String::with_capacity(1 + next_lexeme.len());
|
||||||
|
signed_lexeme.push(if is_negative { '-' } else { '+' });
|
||||||
|
signed_lexeme.push_str(next_lexeme);
|
||||||
|
let value = self.decode_float_literal(&signed_lexeme)?;
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Float(value)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
return Err(
|
||||||
|
self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Token::IntegerLiteral => {
|
||||||
|
let value = self.decode_signed_integer_literal(lexeme)?;
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Integer(value)
|
||||||
|
}
|
||||||
|
Token::FloatLiteral => {
|
||||||
|
let value = self.decode_float_literal(lexeme)?;
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Float(value)
|
||||||
|
}
|
||||||
|
Token::StringLiteral => {
|
||||||
|
let value = self.unescape_string_literal(lexeme);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::String(value)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::True) => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Bool(true)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::False) => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Bool(false)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::None) => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::None
|
||||||
|
}
|
||||||
|
Token::NameLiteral => {
|
||||||
|
let inner = &lexeme[1..lexeme.len() - 1];
|
||||||
|
let value = self.arena.string(inner);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::String(value)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Class) => {
|
||||||
|
self.advance();
|
||||||
|
let (next_token, next_lexeme, _) = self.require_token_lexeme_and_position(
|
||||||
|
ParseErrorKind::DeclarationLiteralUnexpectedToken,
|
||||||
|
)?;
|
||||||
|
if !matches!(next_token, Token::NameLiteral) {
|
||||||
|
return Err(
|
||||||
|
self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let inner = &next_lexeme[1..next_lexeme.len() - 1];
|
||||||
|
let quoted_name = self.arena.string(inner);
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::TaggedName {
|
||||||
|
tag: IdentifierToken(token_position),
|
||||||
|
quoted: quoted_name,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ if token.is_valid_identifier_name() => {
|
||||||
|
self.advance();
|
||||||
|
DeclarationLiteral::Identifier(lexeme)
|
||||||
|
}
|
||||||
|
_ => return Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken)),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(DeclarationLiteralRef {
|
||||||
|
literal,
|
||||||
|
position: token_position,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_class_const_decl(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ClassConstDeclRef<'src, 'arena>> {
|
||||||
|
let start_position = self.expect(
|
||||||
|
Token::Keyword(Keyword::Const),
|
||||||
|
ParseErrorKind::ClassUnexpectedItem,
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
|
||||||
|
self.expect(Token::Assign, ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
|
||||||
|
let value = self.parse_declaration_literal_class()?;
|
||||||
|
|
||||||
|
self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?;
|
||||||
|
let span = AstSpan::range(start_position, self.last_consumed_position_or_start());
|
||||||
|
|
||||||
|
Ok(self
|
||||||
|
.arena
|
||||||
|
.alloc_node(ClassConstDecl { name, value, span }, span))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_class_var_declarators(
|
||||||
|
&mut self,
|
||||||
|
) -> crate::arena::ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
|
||||||
|
let mut declarators = self.arena.vec();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match self.peek_token_and_position() {
|
||||||
|
Some((next_token, declarator_start)) if next_token.is_valid_identifier_name() => {
|
||||||
|
let identifier = self
|
||||||
|
.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)
|
||||||
|
.unwrap_or(IdentifierToken(declarator_start));
|
||||||
|
|
||||||
|
let array_size = match self.parse_array_len_expr() {
|
||||||
|
Ok(value) => value,
|
||||||
|
Err(error) => {
|
||||||
|
self.report_error(error);
|
||||||
|
self.recover_until(SyncLevel::CloseBracket);
|
||||||
|
let _ = self.eat(Token::RightBracket);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = AstSpan::range(identifier.0, self.last_consumed_position_or_start());
|
||||||
|
declarators.push(self.arena.alloc_node(
|
||||||
|
VariableDeclarator {
|
||||||
|
name: identifier,
|
||||||
|
initializer: None,
|
||||||
|
array_size,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
));
|
||||||
|
|
||||||
|
if self.eat(Token::Comma) {
|
||||||
|
if self.peek_token() == Some(Token::Semicolon) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Some((_, _)) if declarators.is_empty() => {
|
||||||
|
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
|
||||||
|
self.recover_until(SyncLevel::Statement);
|
||||||
|
let _ = self.eat(Token::Semicolon);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
declarators
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn parse_identifier_list(
|
||||||
|
&mut self,
|
||||||
|
) -> crate::arena::ArenaVec<'arena, IdentifierToken> {
|
||||||
|
let list_start = self.last_consumed_position_or_start();
|
||||||
|
let mut identifiers = self.arena.vec();
|
||||||
|
|
||||||
|
while let Some((token, _lexeme, identifier_position)) =
|
||||||
|
self.peek_token_lexeme_and_position()
|
||||||
|
{
|
||||||
|
match token {
|
||||||
|
Token::RightParenthesis => break,
|
||||||
|
Token::Comma => {
|
||||||
|
self.advance();
|
||||||
|
self.report_error_here(ParseErrorKind::ListMissingIdentifierBeforeSeparator);
|
||||||
|
}
|
||||||
|
_ if token.is_valid_identifier_name() => {
|
||||||
|
self.advance();
|
||||||
|
identifiers.push(IdentifierToken(identifier_position));
|
||||||
|
if !self.eat(Token::Comma)
|
||||||
|
&& let Some(next_token) = self.peek_token()
|
||||||
|
&& next_token != Token::RightParenthesis
|
||||||
|
{
|
||||||
|
self.report_error_here(ParseErrorKind::ListMissingSeparator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.make_error_here(ParseErrorKind::ListInvalidIdentifier)
|
||||||
|
.sync_error_until(self, SyncLevel::ListSeparator)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if identifiers.is_empty() {
|
||||||
|
let list_end = self.last_consumed_position_or_start();
|
||||||
|
self.report_error(crate::parser::ParseError {
|
||||||
|
kind: ParseErrorKind::ListEmpty,
|
||||||
|
anchor: list_start,
|
||||||
|
blame_span: AstSpan::range(list_start, list_end),
|
||||||
|
covered_span: AstSpan::range(list_start, list_end),
|
||||||
|
related_span: None,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
identifiers
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,180 +0,0 @@
|
|||||||
use crate::ast::{AstSpan, Expression, ExpressionRef};
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
|
||||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses an `if` block, assuming that `if` token was consumed.
|
|
||||||
///
|
|
||||||
/// Produces an [`Expression::If`] spanning from the `if` keyword to
|
|
||||||
/// the end of the last arm (`else` body if present,
|
|
||||||
/// otherwise the `if` body).
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_if_cont(
|
|
||||||
&mut self,
|
|
||||||
if_start_location: TokenLocation,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
let condition = self.parse_expression();
|
|
||||||
let body = self.parse_expression();
|
|
||||||
|
|
||||||
let (else_body, if_end_location) = if let Some(Token::Else) = self.peek_token() {
|
|
||||||
self.advance(); // else
|
|
||||||
let else_body = self.parse_expression();
|
|
||||||
// Capture end before moving `else_body` to build the full `if` span
|
|
||||||
let body_end = else_body.span().to;
|
|
||||||
(Some(else_body), body_end)
|
|
||||||
} else {
|
|
||||||
(None, body.span().to)
|
|
||||||
};
|
|
||||||
|
|
||||||
let span = AstSpan {
|
|
||||||
from: if_start_location,
|
|
||||||
to: if_end_location,
|
|
||||||
};
|
|
||||||
self.arena.alloc(
|
|
||||||
Expression::If {
|
|
||||||
condition,
|
|
||||||
body,
|
|
||||||
else_body,
|
|
||||||
},
|
|
||||||
span,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a `while` loop, assuming that `while` token was consumed.
|
|
||||||
///
|
|
||||||
/// Produces an [`Expression::While`] spanning from the `while` keyword
|
|
||||||
/// to the end of the body.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_while_cont(
|
|
||||||
&mut self,
|
|
||||||
while_start_location: TokenLocation,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
let condition = self.parse_expression();
|
|
||||||
let body = self.parse_expression();
|
|
||||||
let span = AstSpan {
|
|
||||||
from: while_start_location,
|
|
||||||
to: body.span().to,
|
|
||||||
};
|
|
||||||
self.arena
|
|
||||||
.alloc(Expression::While { condition, body }, span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a `do ... until ...` loop after `do`, assuming that `do` token
|
|
||||||
/// was consumed.
|
|
||||||
///
|
|
||||||
/// On a missing `until`, returns an error
|
|
||||||
/// [`ParseErrorKind::DoMissingUntil`].
|
|
||||||
/// On success, produces an [`Expression::DoUntil`] spanning from `do`
|
|
||||||
/// to the end of the condition.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_do_until_cont(
|
|
||||||
&mut self,
|
|
||||||
do_start_location: TokenLocation,
|
|
||||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
|
||||||
let body = self.parse_expression();
|
|
||||||
|
|
||||||
self.expect(Token::Until, ParseErrorKind::DoMissingUntil)
|
|
||||||
.widen_error_span_from(do_start_location)?;
|
|
||||||
let condition = self.parse_expression();
|
|
||||||
let span = AstSpan {
|
|
||||||
from: do_start_location,
|
|
||||||
to: condition.span().to,
|
|
||||||
};
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc(Expression::DoUntil { condition, body }, span))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a `foreach` loop, assuming that `foreach` token was consumed.
|
|
||||||
///
|
|
||||||
/// Produces an [`Expression::ForEach`] spanning from `foreach`
|
|
||||||
/// to the end of the body.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_foreach_cont(
|
|
||||||
&mut self,
|
|
||||||
foreach_start_location: TokenLocation,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
let iterator = self.parse_expression();
|
|
||||||
|
|
||||||
let body = self.parse_expression();
|
|
||||||
let span = AstSpan {
|
|
||||||
from: foreach_start_location,
|
|
||||||
to: body.span().to,
|
|
||||||
};
|
|
||||||
self.arena
|
|
||||||
.alloc(Expression::ForEach { iterator, body }, span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a `for` loop after `for`, assuming that `for` token was consumed.
|
|
||||||
///
|
|
||||||
/// Grammar: `for (init?; condition?; step?) body`.
|
|
||||||
/// Any of `init`, `condition`, or `step` may be omitted.
|
|
||||||
/// Emits specific `ParseErrorKind` values for missing
|
|
||||||
/// delimiters/separators.
|
|
||||||
/// On success returns an [`Expression::For`] spanning from `for` to
|
|
||||||
/// the end of the body.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_for_cont(
|
|
||||||
&mut self,
|
|
||||||
for_start_location: TokenLocation,
|
|
||||||
) -> crate::parser::ParseResult<'src, 'arena, ExpressionRef<'src, 'arena>> {
|
|
||||||
self.expect(
|
|
||||||
Token::LeftParenthesis,
|
|
||||||
ParseErrorKind::ForMissingOpeningParenthesis,
|
|
||||||
)
|
|
||||||
.widen_error_span_from(for_start_location)?;
|
|
||||||
|
|
||||||
let init = if let Some(Token::Semicolon) = self.peek_token() {
|
|
||||||
self.advance();
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
let init = self.parse_expression();
|
|
||||||
self.expect(
|
|
||||||
Token::Semicolon,
|
|
||||||
ParseErrorKind::ForMissingInitializationSemicolon,
|
|
||||||
)?;
|
|
||||||
Some(init)
|
|
||||||
};
|
|
||||||
|
|
||||||
let condition = if let Some(Token::Semicolon) = self.peek_token() {
|
|
||||||
self.advance();
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
let condition = self.parse_expression();
|
|
||||||
self.expect(
|
|
||||||
Token::Semicolon,
|
|
||||||
ParseErrorKind::ForMissingConditionSemicolon,
|
|
||||||
)?;
|
|
||||||
Some(condition)
|
|
||||||
};
|
|
||||||
|
|
||||||
let step = if let Some(Token::RightParenthesis) = self.peek_token() {
|
|
||||||
self.advance();
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
let step = self.parse_expression();
|
|
||||||
self.expect(
|
|
||||||
Token::RightParenthesis,
|
|
||||||
ParseErrorKind::ForMissingClosingParenthesis,
|
|
||||||
)
|
|
||||||
.widen_error_span_from(for_start_location)
|
|
||||||
.sync_error_until(self, crate::parser::SyncLevel::CloseParenthesis)?;
|
|
||||||
Some(step)
|
|
||||||
};
|
|
||||||
|
|
||||||
let body = self.parse_expression();
|
|
||||||
let span = AstSpan {
|
|
||||||
from: for_start_location,
|
|
||||||
to: body.span().to,
|
|
||||||
};
|
|
||||||
Ok(self.arena.alloc(
|
|
||||||
Expression::For {
|
|
||||||
init,
|
|
||||||
condition,
|
|
||||||
step,
|
|
||||||
body,
|
|
||||||
},
|
|
||||||
span,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
138
rottlib/src/parser/grammar/declarations/enum_definition.rs
Normal file
138
rottlib/src/parser/grammar/declarations/enum_definition.rs
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
//! Parsing of enum definitions for Fermented `UnrealScript`.
|
||||||
|
|
||||||
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{AstSpan, EnumDefRef, EnumDefinition, IdentifierToken};
|
||||||
|
use crate::lexer::Token;
|
||||||
|
use crate::lexer::TokenPosition;
|
||||||
|
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
enum EnumParseState {
|
||||||
|
ExpectingVariant,
|
||||||
|
ExpectingSeparator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses an `enum` definition after the `enum` keyword has been consumed.
|
||||||
|
///
|
||||||
|
/// Returns a reference to the allocated enum definition containing its name
|
||||||
|
/// and variants.
|
||||||
|
pub(crate) fn parse_enum_definition_tail(
|
||||||
|
&mut self,
|
||||||
|
enum_keyword_position: TokenPosition,
|
||||||
|
) -> EnumDefRef<'src, 'arena> {
|
||||||
|
let name = self
|
||||||
|
.parse_identifier(ParseErrorKind::EnumExpectedNameOrBrace)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.expect(Token::LeftBrace, ParseErrorKind::EnumMissingLeftBrace)
|
||||||
|
.report_error(self);
|
||||||
|
let variants = self.parse_enum_variants();
|
||||||
|
self.expect(Token::RightBrace, ParseErrorKind::EnumNoClosingBrace)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
let span = AstSpan::range(
|
||||||
|
enum_keyword_position,
|
||||||
|
self.last_consumed_position_or_start(),
|
||||||
|
);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(EnumDefinition { name, variants }, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the list of enum variants inside braces, handling commas and
|
||||||
|
/// errors.
|
||||||
|
///
|
||||||
|
/// Returns a vector of successfully parsed variant identifiers.
|
||||||
|
fn parse_enum_variants(&mut self) -> ArenaVec<'arena, IdentifierToken> {
|
||||||
|
use EnumParseState::{ExpectingSeparator, ExpectingVariant};
|
||||||
|
|
||||||
|
let mut variants = self.arena.vec();
|
||||||
|
let mut parser_state = ExpectingVariant;
|
||||||
|
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
|
||||||
|
let should_break = match (parser_state, next_token) {
|
||||||
|
(_, Token::RightBrace) => break,
|
||||||
|
(ExpectingVariant, Token::Comma) => self
|
||||||
|
.recover_from_empty_enum_variant(next_token_position)
|
||||||
|
.is_break(),
|
||||||
|
(ExpectingVariant, _) => {
|
||||||
|
parser_state = ExpectingSeparator;
|
||||||
|
self.parse_and_push_enum_variant(&mut variants).is_break()
|
||||||
|
}
|
||||||
|
(ExpectingSeparator, Token::Comma) => {
|
||||||
|
self.advance(); // `,`
|
||||||
|
parser_state = ExpectingVariant;
|
||||||
|
false
|
||||||
|
}
|
||||||
|
(ExpectingSeparator, _) => self
|
||||||
|
.parse_enum_variant_after_missing_separator(next_token_position, &mut variants)
|
||||||
|
.is_break(),
|
||||||
|
};
|
||||||
|
if should_break {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.ensure_forward_progress(next_token_position);
|
||||||
|
}
|
||||||
|
variants
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recovers from one or more commas appearing where a variant is expected.
|
||||||
|
///
|
||||||
|
/// Stops parsing if only a closing brace or end-of-file remains.
|
||||||
|
fn recover_from_empty_enum_variant(
|
||||||
|
&mut self,
|
||||||
|
error_start_position: TokenPosition,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
while self.peek_token() == Some(Token::Comma) {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
self.make_error_here(ParseErrorKind::EnumEmptyVariants)
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.report_error(self);
|
||||||
|
if matches!(self.peek_token(), Some(Token::RightBrace) | None) {
|
||||||
|
ControlFlow::Break(())
|
||||||
|
} else {
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses one enum variant and appends it to `variants`.
|
||||||
|
///
|
||||||
|
/// Stops parsing if recovery does not produce a valid identifier.
|
||||||
|
fn parse_and_push_enum_variant(
|
||||||
|
&mut self,
|
||||||
|
variants: &mut ArenaVec<'arena, IdentifierToken>,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
self.parse_identifier(ParseErrorKind::EnumBadVariant)
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.ok_or_report(self)
|
||||||
|
.map_or(ControlFlow::Break(()), |variant| {
|
||||||
|
variants.push(variant);
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a variant after a missing separator and reports the missing-comma
|
||||||
|
/// diagnostic if recovery succeeds.
|
||||||
|
fn parse_enum_variant_after_missing_separator(
|
||||||
|
&mut self,
|
||||||
|
error_start_position: TokenPosition,
|
||||||
|
variants: &mut ArenaVec<'arena, IdentifierToken>,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
let Some(variant) = self
|
||||||
|
.parse_identifier(ParseErrorKind::EnumBadVariant)
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.ok_or_report(self)
|
||||||
|
else {
|
||||||
|
// If we don't even get a good identifier - error is different
|
||||||
|
return ControlFlow::Break(());
|
||||||
|
};
|
||||||
|
self.make_error_here(ParseErrorKind::EnumNoSeparatorBetweenVariants)
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
variants.push(variant);
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
}
|
||||||
|
}
|
||||||
11
rottlib/src/parser/grammar/declarations/mod.rs
Normal file
11
rottlib/src/parser/grammar/declarations/mod.rs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
//! Declaration parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Implements recursive-descent parsing for declaration-related grammar:
|
||||||
|
//! type specifiers, enum and struct definitions, `var(...)` prefixes,
|
||||||
|
//! and variable declarators.
|
||||||
|
|
||||||
|
mod enum_definition;
|
||||||
|
mod struct_definition;
|
||||||
|
mod type_specifier; // Type-specifier parsing (variable types).
|
||||||
|
mod var_specifiers; // `var(...)` editor specifiers and declaration-modifiers.
|
||||||
|
mod variable_declarators; // Comma-separated declarator lists (variable lists).
|
||||||
210
rottlib/src/parser/grammar/declarations/struct_definition.rs
Normal file
210
rottlib/src/parser/grammar/declarations/struct_definition.rs
Normal file
@ -0,0 +1,210 @@
|
|||||||
|
//! Parsing of struct definitions for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! ## C++ block handling
|
||||||
|
//!
|
||||||
|
//! The Fermented `UnrealScript` parser must support parsing several legacy
|
||||||
|
//! source files that contain `cpptext` or `cppstruct`. Our compiler does not
|
||||||
|
//! compile with C++ code and therefore does not need these blocks in
|
||||||
|
//! the resulting AST. We treat them the same as trivia and skip them.
|
||||||
|
//!
|
||||||
|
//! However, some related tokens are context-sensitive, so handling these
|
||||||
|
//! blocks in the general trivia-skipping path would complicate the separation
|
||||||
|
//! between the lexer and the parser.
|
||||||
|
//!
|
||||||
|
//! The resulting files will not be compiled, but they can still be used to
|
||||||
|
//! extract type information.
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{
|
||||||
|
AstSpan, IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField,
|
||||||
|
StructFieldRef, StructModifier, StructModifierKind, TypeSpecifierRef, VarEditorSpecifierRef,
|
||||||
|
VarModifier,
|
||||||
|
};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct ParsedStructFieldPrefix<'src, 'arena> {
|
||||||
|
editor_specifiers: Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>>,
|
||||||
|
declaration_modifiers: ArenaVec<'arena, VarModifier>,
|
||||||
|
type_specifier: TypeSpecifierRef<'src, 'arena>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum StructBodyItemParseOutcome<'src, 'arena> {
|
||||||
|
Field(StructFieldRef<'src, 'arena>),
|
||||||
|
Skip,
|
||||||
|
Stop,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a `struct` definition after the `struct` keyword has been
|
||||||
|
/// consumed.
|
||||||
|
pub(crate) fn parse_struct_definition_tail(
|
||||||
|
&mut self,
|
||||||
|
struct_keyword_position: TokenPosition,
|
||||||
|
) -> StructDefRef<'src, 'arena> {
|
||||||
|
let modifiers = self.parse_struct_declaration_modifiers();
|
||||||
|
let (name, base_type_name) = self.parse_struct_name_base_and_open_brace();
|
||||||
|
|
||||||
|
let mut fields = self.arena.vec();
|
||||||
|
while let Some((next_token, next_position)) = self.peek_token_and_position()
|
||||||
|
&& next_token != Token::RightBrace
|
||||||
|
{
|
||||||
|
match self.parse_or_skip_struct_body_item() {
|
||||||
|
StructBodyItemParseOutcome::Field(new_field) => fields.push(new_field),
|
||||||
|
StructBodyItemParseOutcome::Skip => (),
|
||||||
|
StructBodyItemParseOutcome::Stop => break,
|
||||||
|
}
|
||||||
|
self.ensure_forward_progress(next_position);
|
||||||
|
}
|
||||||
|
self.expect(Token::RightBrace, ParseErrorKind::StructMissingRightBrace)
|
||||||
|
.widen_error_span_from(struct_keyword_position)
|
||||||
|
.report_error(self);
|
||||||
|
let span = AstSpan::range(
|
||||||
|
struct_keyword_position,
|
||||||
|
self.last_consumed_position_or_start(),
|
||||||
|
);
|
||||||
|
self.arena.alloc_node(
|
||||||
|
StructDefinition {
|
||||||
|
name,
|
||||||
|
base_type_name,
|
||||||
|
modifiers,
|
||||||
|
fields,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses one item in a struct body or skips an unsupported one.
|
||||||
|
///
|
||||||
|
/// Returns [`StructBodyItemParseOutcome::Field`] for a successfully parsed
|
||||||
|
/// field, [`StructBodyItemParseOutcome::Skip`] when recovery allows parsing
|
||||||
|
/// to continue, and [`StructBodyItemParseOutcome::Stop`] when parsing
|
||||||
|
/// should stop at this level.
|
||||||
|
fn parse_or_skip_struct_body_item(&mut self) -> StructBodyItemParseOutcome<'src, 'arena> {
|
||||||
|
let Some((token, token_position)) = self.peek_token_and_position() else {
|
||||||
|
// This is the end of the file;
|
||||||
|
// it will be handled by a higher-level parser.
|
||||||
|
return StructBodyItemParseOutcome::Stop;
|
||||||
|
};
|
||||||
|
match token {
|
||||||
|
Token::Keyword(Keyword::CppText | Keyword::CppStruct) => {
|
||||||
|
self.advance();
|
||||||
|
if !self.eat(Token::CppBlock) {
|
||||||
|
self.report_error_here(ParseErrorKind::CppDirectiveMissingCppBlock);
|
||||||
|
self.recover_until(SyncLevel::Statement);
|
||||||
|
}
|
||||||
|
StructBodyItemParseOutcome::Skip
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Var) => {
|
||||||
|
self.advance();
|
||||||
|
self.parse_struct_field_tail(token_position)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.report_error_here(ParseErrorKind::StructBodyUnexpectedItem);
|
||||||
|
self.recover_until(SyncLevel::BlockBoundary);
|
||||||
|
StructBodyItemParseOutcome::Skip
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a struct field after the `var` keyword has been consumed.
|
||||||
|
///
|
||||||
|
/// Returns [`StructBodyItemParseOutcome::Skip`] if the field cannot be
|
||||||
|
/// parsed far enough to produce a usable AST node after recovery.
|
||||||
|
fn parse_struct_field_tail(
|
||||||
|
&mut self,
|
||||||
|
var_keyword_position: TokenPosition,
|
||||||
|
) -> StructBodyItemParseOutcome<'src, 'arena> {
|
||||||
|
let Some(field_prefix) = self.parse_struct_field_prefix() else {
|
||||||
|
return StructBodyItemParseOutcome::Skip;
|
||||||
|
};
|
||||||
|
let declarators = self.parse_variable_declarators();
|
||||||
|
if !self.eat(Token::Semicolon) {
|
||||||
|
self.report_error_here(ParseErrorKind::StructFieldMissingSemicolon);
|
||||||
|
self.recover_until(SyncLevel::BlockBoundary);
|
||||||
|
let _ = self.eat(Token::Semicolon);
|
||||||
|
}
|
||||||
|
if declarators.is_empty() {
|
||||||
|
return StructBodyItemParseOutcome::Skip;
|
||||||
|
}
|
||||||
|
let span = AstSpan::range(var_keyword_position, self.last_consumed_position_or_start());
|
||||||
|
StructBodyItemParseOutcome::Field(self.arena.alloc_node(
|
||||||
|
StructField {
|
||||||
|
type_specifier: field_prefix.type_specifier,
|
||||||
|
declaration_modifiers: field_prefix.declaration_modifiers,
|
||||||
|
editor_specifiers: field_prefix.editor_specifiers,
|
||||||
|
declarators,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_struct_field_prefix(&mut self) -> Option<ParsedStructFieldPrefix<'src, 'arena>> {
|
||||||
|
let editor_specifiers = self.parse_var_editor_specifier_list();
|
||||||
|
let declaration_modifiers = self.parse_var_declaration_modifiers();
|
||||||
|
let type_specification = self
|
||||||
|
.parse_type_specifier()
|
||||||
|
.sync_error_until(self, SyncLevel::BlockBoundary)
|
||||||
|
.ok_or_report(self)?;
|
||||||
|
Some(ParsedStructFieldPrefix {
|
||||||
|
editor_specifiers,
|
||||||
|
declaration_modifiers,
|
||||||
|
type_specifier: type_specification,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the struct name, optional base type, and opening brace.
|
||||||
|
///
|
||||||
|
/// Accepts anonymous structs that begin immediately with `{`.
|
||||||
|
fn parse_struct_name_base_and_open_brace(
|
||||||
|
&mut self,
|
||||||
|
) -> (
|
||||||
|
Option<IdentifierToken>,
|
||||||
|
Option<QualifiedIdentifierRef<'arena>>,
|
||||||
|
) {
|
||||||
|
if self.eat(Token::LeftBrace) {
|
||||||
|
return (None, None);
|
||||||
|
}
|
||||||
|
let name = self
|
||||||
|
.parse_identifier(ParseErrorKind::StructExpectedNameOrBrace)
|
||||||
|
.ok_or_report(self);
|
||||||
|
let base_type_name =
|
||||||
|
if let Some((Token::Keyword(Keyword::Extends), extends_keyword_position)) =
|
||||||
|
self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
self.parse_qualified_identifier(ParseErrorKind::StructExpectedBaseName)
|
||||||
|
.widen_error_span_from(extends_keyword_position)
|
||||||
|
.ok_or_report(self)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
self.expect(Token::LeftBrace, ParseErrorKind::StructMissingLeftBrace)
|
||||||
|
.report_error(self);
|
||||||
|
(name, base_type_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_struct_declaration_modifiers(&mut self) -> ArenaVec<'arena, StructModifier> {
|
||||||
|
let mut modifiers = self.arena.vec();
|
||||||
|
while let Some((next_keyword, next_keyword_position)) = self.peek_keyword_and_position() {
|
||||||
|
let next_modifier_kind = match next_keyword {
|
||||||
|
Keyword::Native => StructModifierKind::Native,
|
||||||
|
Keyword::Init => StructModifierKind::Init,
|
||||||
|
Keyword::Export => StructModifierKind::Export,
|
||||||
|
Keyword::NoExport => StructModifierKind::NoExport,
|
||||||
|
Keyword::Transient => StructModifierKind::Transient,
|
||||||
|
Keyword::Deprecated => StructModifierKind::Deprecated,
|
||||||
|
Keyword::Long => StructModifierKind::Long,
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
|
modifiers.push(StructModifier {
|
||||||
|
kind: next_modifier_kind,
|
||||||
|
position: next_keyword_position,
|
||||||
|
});
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
modifiers
|
||||||
|
}
|
||||||
|
}
|
||||||
116
rottlib/src/parser/grammar/declarations/type_specifier.rs
Normal file
116
rottlib/src/parser/grammar/declarations/type_specifier.rs
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
//! Parsing of type specifiers for Fermented `UnrealScript`.
|
||||||
|
|
||||||
|
use crate::ast::{AstSpan, TypeSpecifier, TypeSpecifierRef};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult, Parser};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a type specifier used in variable declarations.
|
||||||
|
///
|
||||||
|
/// Accepts named types, `class<...>` types, `array<...>` types, and inline
|
||||||
|
/// `enum` and `struct` definitions.
|
||||||
|
///
|
||||||
|
/// Returns an error if the next tokens do not form a valid type specifier.
|
||||||
|
pub(crate) fn parse_type_specifier(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
|
||||||
|
let (starting_token, starting_token_position) =
|
||||||
|
self.require_token_and_position(ParseErrorKind::TypeSpecExpectedType)?;
|
||||||
|
|
||||||
|
match starting_token {
|
||||||
|
Token::Keyword(Keyword::Enum) => {
|
||||||
|
self.advance();
|
||||||
|
Ok(self.parse_inline_enum_tail(starting_token_position))
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Struct) => {
|
||||||
|
self.advance();
|
||||||
|
Ok(self.parse_inline_struct_tail(starting_token_position))
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Array) => {
|
||||||
|
self.advance();
|
||||||
|
self.parse_array_type_specification_tail(starting_token_position)
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Class) => {
|
||||||
|
self.advance();
|
||||||
|
self.parse_class_type_specification_tail(starting_token_position)
|
||||||
|
}
|
||||||
|
_ if starting_token.is_valid_type_name() => {
|
||||||
|
let type_name =
|
||||||
|
self.parse_qualified_identifier(ParseErrorKind::TypeSpecInvalidNamedTypeName)?;
|
||||||
|
let full_span = *type_name.span();
|
||||||
|
Ok(self
|
||||||
|
.arena
|
||||||
|
.alloc_node(TypeSpecifier::Named(type_name), full_span))
|
||||||
|
}
|
||||||
|
_ => Err(self.make_error_here(ParseErrorKind::TypeSpecExpectedType)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_inline_enum_tail(
|
||||||
|
&mut self,
|
||||||
|
starting_token_position: TokenPosition,
|
||||||
|
) -> TypeSpecifierRef<'src, 'arena> {
|
||||||
|
let enum_definition = self.parse_enum_definition_tail(starting_token_position);
|
||||||
|
let enum_span = AstSpan::range(starting_token_position, enum_definition.span().token_to);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(TypeSpecifier::InlineEnum(enum_definition), enum_span)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_inline_struct_tail(
|
||||||
|
&mut self,
|
||||||
|
starting_token_position: TokenPosition,
|
||||||
|
) -> TypeSpecifierRef<'src, 'arena> {
|
||||||
|
let struct_definition = self.parse_struct_definition_tail(starting_token_position);
|
||||||
|
let struct_span =
|
||||||
|
AstSpan::range(starting_token_position, struct_definition.span().token_to);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(TypeSpecifier::InlineStruct(struct_definition), struct_span)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_array_type_specification_tail(
|
||||||
|
&mut self,
|
||||||
|
starting_token_position: TokenPosition,
|
||||||
|
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
|
||||||
|
self.expect(
|
||||||
|
Token::Less,
|
||||||
|
ParseErrorKind::TypeSpecArrayMissingOpeningAngle,
|
||||||
|
)?;
|
||||||
|
let element_modifiers = self.parse_var_declaration_modifiers();
|
||||||
|
let element_type = self.parse_type_specifier()?;
|
||||||
|
let closing_angle_bracket_position = self.expect(
|
||||||
|
Token::Greater,
|
||||||
|
ParseErrorKind::TypeSpecArrayMissingClosingAngle,
|
||||||
|
)?;
|
||||||
|
let array_span = AstSpan::range(starting_token_position, closing_angle_bracket_position);
|
||||||
|
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
TypeSpecifier::Array {
|
||||||
|
element_type,
|
||||||
|
element_modifiers,
|
||||||
|
},
|
||||||
|
array_span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_class_type_specification_tail(
|
||||||
|
&mut self,
|
||||||
|
starting_token_position: TokenPosition,
|
||||||
|
) -> ParseResult<'src, 'arena, TypeSpecifierRef<'src, 'arena>> {
|
||||||
|
let (inner_type_name, class_type_end) = if self.eat(Token::Less) {
|
||||||
|
let inner_type_name = Some(
|
||||||
|
self.parse_qualified_identifier(ParseErrorKind::TypeSpecClassMissingInnerType)?,
|
||||||
|
);
|
||||||
|
let class_type_end = self.expect(
|
||||||
|
Token::Greater,
|
||||||
|
ParseErrorKind::TypeSpecClassMissingClosingAngle,
|
||||||
|
)?;
|
||||||
|
(inner_type_name, class_type_end)
|
||||||
|
} else {
|
||||||
|
(None, starting_token_position)
|
||||||
|
};
|
||||||
|
let span = AstSpan::range(starting_token_position, class_type_end);
|
||||||
|
Ok(self
|
||||||
|
.arena
|
||||||
|
.alloc_node(TypeSpecifier::Class(inner_type_name), span))
|
||||||
|
}
|
||||||
|
}
|
||||||
89
rottlib/src/parser/grammar/declarations/var_specifiers.rs
Normal file
89
rottlib/src/parser/grammar/declarations/var_specifiers.rs
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
//! Parsing of declaration specifiers used in `var(...) ...` syntax for
|
||||||
|
//! Fermented `UnrealScript`.
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{VarEditorSpecifier, VarEditorSpecifierRef, VarModifier};
|
||||||
|
use crate::lexer::Token;
|
||||||
|
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a consecutive run of variable declaration modifiers.
|
||||||
|
///
|
||||||
|
/// This is used for declarations such as
|
||||||
|
/// `var transient config editconst int X;`.
|
||||||
|
///
|
||||||
|
/// Parsing stops when the next token is not a recognized [`VarModifier`].
|
||||||
|
/// That token is left unconsumed for the caller.
|
||||||
|
///
|
||||||
|
/// Returns the parsed modifiers in source order, or an empty vector if the
|
||||||
|
/// current token does not begin a modifier list.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_var_declaration_modifiers(&mut self) -> ArenaVec<'arena, VarModifier> {
|
||||||
|
let mut modifiers = self.arena.vec();
|
||||||
|
while let Some(current_token_and_position) = self.peek_token_and_position() {
|
||||||
|
let Ok(parsed_modifier) = VarModifier::try_from(current_token_and_position) else {
|
||||||
|
break;
|
||||||
|
};
|
||||||
|
self.advance();
|
||||||
|
modifiers.push(parsed_modifier);
|
||||||
|
}
|
||||||
|
modifiers
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the optional parenthesized editor specifier list in `var(...)`.
|
||||||
|
///
|
||||||
|
/// Assumes that `var` has already been consumed.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the current token is not `(`. Returns `Some(...)` once
|
||||||
|
/// `(` is present, including for an empty list.
|
||||||
|
///
|
||||||
|
/// Recovery is intentionally minimal because these specifier lists are not
|
||||||
|
/// important enough to justify aggressive repair.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_var_editor_specifier_list(
|
||||||
|
&mut self,
|
||||||
|
) -> Option<ArenaVec<'arena, VarEditorSpecifierRef<'src, 'arena>>> {
|
||||||
|
if !self.eat(Token::LeftParenthesis) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut editor_specifiers = self.arena.vec();
|
||||||
|
while let Some((next_token, next_token_lexeme, next_token_position)) =
|
||||||
|
self.peek_token_lexeme_and_position()
|
||||||
|
&& next_token != Token::RightParenthesis
|
||||||
|
{
|
||||||
|
if next_token == Token::StringLiteral {
|
||||||
|
self.advance();
|
||||||
|
let string_value = self.unescape_string_literal(next_token_lexeme);
|
||||||
|
editor_specifiers.push(self.arena.alloc_node_at(
|
||||||
|
VarEditorSpecifier::String(string_value),
|
||||||
|
next_token_position,
|
||||||
|
));
|
||||||
|
} else if let Some(specifier_identifier) =
|
||||||
|
Self::identifier_token_from_token(next_token, next_token_position)
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
editor_specifiers.push(self.arena.alloc_node_at(
|
||||||
|
VarEditorSpecifier::Identifier(specifier_identifier),
|
||||||
|
next_token_position,
|
||||||
|
));
|
||||||
|
} else {
|
||||||
|
self.make_error_here(ParseErrorKind::VarSpecNotIdentifier)
|
||||||
|
.sync_error_until(self, SyncLevel::ListSeparator)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
// Detailed recovery is not worthwhile here;
|
||||||
|
// stop once list structure becomes unclear.
|
||||||
|
if !self.eat(Token::Comma) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.ensure_forward_progress(next_token_position);
|
||||||
|
}
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::VarSpecsMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
Some(editor_specifiers)
|
||||||
|
}
|
||||||
|
}
|
||||||
172
rottlib/src/parser/grammar/declarations/variable_declarators.rs
Normal file
172
rottlib/src/parser/grammar/declarations/variable_declarators.rs
Normal file
@ -0,0 +1,172 @@
|
|||||||
|
//! Parsing of comma-separated variable declarator lists for
|
||||||
|
//! Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Extends original `UnrealScript` by allowing array-size expressions and
|
||||||
|
//! declarator initializers.
|
||||||
|
|
||||||
|
#![allow(clippy::option_if_let_else)]
|
||||||
|
|
||||||
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{AstSpan, OptionalExpression, VariableDeclarator, VariableDeclaratorRef};
|
||||||
|
use crate::lexer::{Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
|
enum VariableDeclaratorParseState {
|
||||||
|
ExpectingDeclarator,
|
||||||
|
ExpectingSeparator,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a comma-separated list of variable declarators.
|
||||||
|
///
|
||||||
|
/// Accepts optional array-size expressions and `=` initializers.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_variable_declarators(
|
||||||
|
&mut self,
|
||||||
|
) -> ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>> {
|
||||||
|
use VariableDeclaratorParseState::{ExpectingDeclarator, ExpectingSeparator};
|
||||||
|
|
||||||
|
let mut declarators = self.arena.vec();
|
||||||
|
let mut parser_state = ExpectingDeclarator;
|
||||||
|
while let Some((next_token, next_token_position)) = self.peek_token_and_position() {
|
||||||
|
match (parser_state, next_token) {
|
||||||
|
(ExpectingDeclarator, Token::Semicolon) => {
|
||||||
|
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
|
||||||
|
return declarators;
|
||||||
|
}
|
||||||
|
(ExpectingDeclarator, Token::Comma) => {
|
||||||
|
if self
|
||||||
|
.recover_empty_variable_declarator(next_token_position)
|
||||||
|
.is_break()
|
||||||
|
{
|
||||||
|
return declarators;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(ExpectingDeclarator, _) => {
|
||||||
|
if self
|
||||||
|
.parse_variable_declarator_into(&mut declarators)
|
||||||
|
.is_break()
|
||||||
|
{
|
||||||
|
// Breaking means we've failed to parse declarator
|
||||||
|
self.report_error_here(ParseErrorKind::DeclEmptyVariableDeclarations);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
parser_state = ExpectingSeparator;
|
||||||
|
}
|
||||||
|
(ExpectingSeparator, Token::Comma) => {
|
||||||
|
self.advance();
|
||||||
|
parser_state = ExpectingDeclarator;
|
||||||
|
}
|
||||||
|
(ExpectingSeparator, Token::Semicolon) => break,
|
||||||
|
(ExpectingSeparator, _) => {
|
||||||
|
if self
|
||||||
|
.recover_missing_variable_declarator_separator(
|
||||||
|
next_token_position,
|
||||||
|
&mut declarators,
|
||||||
|
)
|
||||||
|
.is_break()
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.ensure_forward_progress(next_token_position);
|
||||||
|
}
|
||||||
|
// In case of reaching EOF here, it does not matter if we emit
|
||||||
|
// an additional diagnostic.
|
||||||
|
// The caller is expected to report the more relevant enclosing error.
|
||||||
|
declarators
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recover_empty_variable_declarator(
|
||||||
|
&mut self,
|
||||||
|
error_start_position: TokenPosition,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
while self.peek_token() == Some(Token::Comma) {
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
self.make_error_here(ParseErrorKind::DeclEmptyVariableDeclarations)
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.report_error(self);
|
||||||
|
if matches!(self.peek_token(), Some(Token::Semicolon) | None) {
|
||||||
|
ControlFlow::Break(())
|
||||||
|
} else {
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_variable_declarator_into(
|
||||||
|
&mut self,
|
||||||
|
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
if let Some(parsed_declarator) = self
|
||||||
|
.parse_variable_declarator()
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.ok_or_report(self)
|
||||||
|
{
|
||||||
|
declarators.push(parsed_declarator);
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
} else {
|
||||||
|
ControlFlow::Break(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recover_missing_variable_declarator_separator(
|
||||||
|
&mut self,
|
||||||
|
error_start_position: TokenPosition,
|
||||||
|
declarators: &mut ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>,
|
||||||
|
) -> ControlFlow<()> {
|
||||||
|
if let Some(parsed_declarator) = self
|
||||||
|
.parse_variable_declarator()
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.ok_or_report(self)
|
||||||
|
{
|
||||||
|
self.make_error_here(ParseErrorKind::DeclNoSeparatorBetweenVariableDeclarations)
|
||||||
|
.widen_error_span_from(error_start_position)
|
||||||
|
.report_error(self);
|
||||||
|
declarators.push(parsed_declarator);
|
||||||
|
ControlFlow::Continue(())
|
||||||
|
} else {
|
||||||
|
ControlFlow::Break(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_variable_declarator(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, VariableDeclaratorRef<'src, 'arena>> {
|
||||||
|
let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?;
|
||||||
|
let array_size = self.parse_optional_array_size();
|
||||||
|
let initializer = self.parse_optional_variable_initializer();
|
||||||
|
let span = AstSpan::range(name.0, self.last_consumed_position_or_start());
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
VariableDeclarator {
|
||||||
|
name,
|
||||||
|
initializer,
|
||||||
|
array_size,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_optional_array_size(&mut self) -> OptionalExpression<'src, 'arena> {
|
||||||
|
if !self.eat(Token::LeftBracket) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let array_size_expression = self.parse_expression();
|
||||||
|
self.expect(
|
||||||
|
Token::RightBracket,
|
||||||
|
ParseErrorKind::DeclExpectedRightBracketAfterArraySize,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseBracket)
|
||||||
|
.report_error(self);
|
||||||
|
Some(array_size_expression)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_optional_variable_initializer(&mut self) -> OptionalExpression<'src, 'arena> {
|
||||||
|
self.eat(Token::Assign).then(|| self.parse_expression())
|
||||||
|
}
|
||||||
|
}
|
||||||
109
rottlib/src/parser/grammar/expression/block.rs
Normal file
109
rottlib/src/parser/grammar/expression/block.rs
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
//! Block-body parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Provides shared routines for parsing `{ ... }`-delimited bodies used in
|
||||||
|
//! function, loop, state, and similar constructs after the opening `{`
|
||||||
|
//! has been consumed.
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{AstSpan, BlockBody, Expression, ExpressionRef, Statement, StatementRef};
|
||||||
|
use crate::lexer::{Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, Parser};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a `{ ... }` block after the opening `{` has been consumed.
|
||||||
|
///
|
||||||
|
/// Consumes tokens until the matching `}` and returns an
|
||||||
|
/// [`Expression::Block`] whose span covers the entire block, from
|
||||||
|
/// `opening_brace_position` to the closing `}`.
|
||||||
|
///
|
||||||
|
/// On premature end-of-file, returns a best-effort block.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_block_tail(
|
||||||
|
&mut self,
|
||||||
|
opening_brace_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let BlockBody { statements, span } =
|
||||||
|
self.parse_braced_block_statements_tail(opening_brace_position);
|
||||||
|
self.arena.alloc_node(Expression::Block(statements), span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `{ ... }` block after the opening `{` has been consumed.
|
||||||
|
///
|
||||||
|
/// Consumes tokens until the matching `}` and returns the contained
|
||||||
|
/// statements together with a span that covers the entire block, from
|
||||||
|
/// `opening_brace_position` to the closing `}`.
|
||||||
|
///
|
||||||
|
/// On premature end-of-file, returns a best-effort statement list and span.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_braced_block_statements_tail(
|
||||||
|
&mut self,
|
||||||
|
opening_brace_position: TokenPosition,
|
||||||
|
) -> BlockBody<'src, 'arena> {
|
||||||
|
let mut statements = self.arena.vec();
|
||||||
|
while let Some((token, token_position)) = self.peek_token_and_position() {
|
||||||
|
if token == Token::RightBrace {
|
||||||
|
self.advance(); // '}'
|
||||||
|
let span = AstSpan::range(opening_brace_position, token_position);
|
||||||
|
return BlockBody { statements, span };
|
||||||
|
}
|
||||||
|
self.parse_next_block_item_into(&mut statements);
|
||||||
|
self.ensure_forward_progress(token_position);
|
||||||
|
}
|
||||||
|
// Reached EOF without a closing `}`
|
||||||
|
self.report_error_here(ParseErrorKind::BlockMissingClosingBrace);
|
||||||
|
let span = AstSpan::range(
|
||||||
|
opening_brace_position,
|
||||||
|
self.last_consumed_position_or_start(),
|
||||||
|
);
|
||||||
|
BlockBody { statements, span }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses one statement inside a `{ ... }` block and appends it to
|
||||||
|
/// `statements`.
|
||||||
|
///
|
||||||
|
/// This method never consumes the closing `}` and is only meant to be
|
||||||
|
/// called while parsing inside a block. It always appends at least one
|
||||||
|
/// statement, even in the presence of syntax errors.
|
||||||
|
pub(crate) fn parse_next_block_item_into(
|
||||||
|
&mut self,
|
||||||
|
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||||
|
) {
|
||||||
|
let mut next_statement = self.parse_statement().unwrap_or_else(|| {
|
||||||
|
let next_expression = self.parse_expression();
|
||||||
|
let next_expression_span = *next_expression.span();
|
||||||
|
self.arena
|
||||||
|
.alloc_node(Statement::Expression(next_expression), next_expression_span)
|
||||||
|
});
|
||||||
|
if statement_needs_semicolon(&next_statement)
|
||||||
|
&& let Some((Token::Semicolon, semicolon_position)) = self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
next_statement.span_mut().extend_to(semicolon_position);
|
||||||
|
self.advance(); // ';'
|
||||||
|
}
|
||||||
|
statements.push(next_statement);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn statement_needs_semicolon(statement: &Statement) -> bool {
|
||||||
|
use Statement::{Empty, Error, Expression, Function, Label, LocalVariableDeclaration};
|
||||||
|
match statement {
|
||||||
|
Empty | Label(_) | Error | Function(_) => false,
|
||||||
|
Expression(expression) => expression_needs_semicolon(expression),
|
||||||
|
LocalVariableDeclaration { .. } => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn expression_needs_semicolon(expression: &Expression) -> bool {
|
||||||
|
use Expression::{Block, DoUntil, Error, For, ForEach, If, Switch, While};
|
||||||
|
matches!(
|
||||||
|
expression,
|
||||||
|
Block { .. }
|
||||||
|
| If { .. }
|
||||||
|
| While { .. }
|
||||||
|
| DoUntil { .. }
|
||||||
|
| ForEach { .. }
|
||||||
|
| For { .. }
|
||||||
|
| Switch { .. }
|
||||||
|
| Error
|
||||||
|
)
|
||||||
|
}
|
||||||
446
rottlib/src/parser/grammar/expression/control_flow.rs
Normal file
446
rottlib/src/parser/grammar/expression/control_flow.rs
Normal file
@ -0,0 +1,446 @@
|
|||||||
|
//! Control expression parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! ## Condition parsing and legacy compatibility
|
||||||
|
//!
|
||||||
|
//! Fermented `UnrealScript` allows omitting parentheses `(...)` around the
|
||||||
|
//! condition expression of `if`/`while`/etc. For compatibility with older
|
||||||
|
//! `UnrealScript` code, we also apply a special rule:
|
||||||
|
//!
|
||||||
|
//! If a condition starts with `(`, we parse the condition as exactly the
|
||||||
|
//! matching parenthesized subexpression and stop at its corresponding `)`.
|
||||||
|
//! In other words, `( ... )` must cover the whole condition; trailing tokens
|
||||||
|
//! like `* c == d` are not allowed to continue the condition.
|
||||||
|
//!
|
||||||
|
//! This prevents the parser from accidentally consuming the following
|
||||||
|
//! statement/body as part of the condition in older code such as:
|
||||||
|
//!
|
||||||
|
//! ```unrealscript
|
||||||
|
//! if ( AIController(Controller) != None ) Cross = vect(0,0,0);
|
||||||
|
//! ```
|
||||||
|
//!
|
||||||
|
//! Trade-off: you cannot write `if (a + b) * c == d`;
|
||||||
|
//! write `if ((a + b) * c == d)` or `if d == (a + b) * c` instead.
|
||||||
|
//!
|
||||||
|
//! ## Disambiguation of `for` as loop vs expression
|
||||||
|
//!
|
||||||
|
//! Unlike other control-flow keywords, `for` is disambiguated from a functions
|
||||||
|
//! or variables with the same name. This is done syntactically in
|
||||||
|
//! [`Parser::is_for_loop_header_ahead`]: a `for` token followed by
|
||||||
|
//! a `(` whose contents contain a top-level `;` is unambiguously a loop header.
|
||||||
|
//!
|
||||||
|
//! This rule is lightweight, local, and robust, and mirrors the fixed grammar
|
||||||
|
//! `for (init; condition; step)` without requiring name resolution.
|
||||||
|
//!
|
||||||
|
//! ### Why this is not done for `if` / `while` / `do`
|
||||||
|
//!
|
||||||
|
//! No similarly reliable way to discriminate `if`, `while`, or related
|
||||||
|
//! keywords at this stage of parsing: their parenthesized forms are
|
||||||
|
//! indistinguishable from single argument function calls.
|
||||||
|
//!
|
||||||
|
//! Supporting these keywords as identifiers would complicate parsing
|
||||||
|
//! disproportionately and we always treat them as openers for conditional and
|
||||||
|
//! cycle expressions. This matches common `UnrealScript` usage and intentionally
|
||||||
|
//! drops support for moronic design choices where such names were reused
|
||||||
|
//! as variables or functions (like what author did by declaring
|
||||||
|
//! a `For` function in Acedia).
|
||||||
|
//!
|
||||||
|
//! ### But what about `switch`?
|
||||||
|
//!
|
||||||
|
//! `switch` is handled separately because, in existing `UnrealScript` code,
|
||||||
|
//! it may appear either as a keyword-led construct or as an identifier.
|
||||||
|
//!
|
||||||
|
//! Its disambiguation rule is simpler than for `for`: if the next token is
|
||||||
|
//! `(`, `switch` is parsed as a `switch` expression; otherwise it remains
|
||||||
|
//! available as an identifier.
|
||||||
|
//!
|
||||||
|
//! This rule is local and purely syntactic, matching the behavior expected by
|
||||||
|
//! the existing codebase we support. The actual parsing of `switch` expressions
|
||||||
|
//! lives in a separate module because the construct itself is more involved
|
||||||
|
//! than the control-flow forms handled here.
|
||||||
|
|
||||||
|
use crate::ast::{AstSpan, BranchBody, Expression, ExpressionRef};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a control-flow condition.
|
||||||
|
///
|
||||||
|
/// If the next token is `(`, attempts to consume one parenthesized
|
||||||
|
/// subexpression and returns it wrapped as [`Expression::Parentheses`].
|
||||||
|
/// Otherwise consumes a general expression.
|
||||||
|
fn parse_condition(&mut self) -> ExpressionRef<'src, 'arena> {
|
||||||
|
if let Some((Token::LeftParenthesis, left_parenthesis_position)) =
|
||||||
|
self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.advance(); // '('
|
||||||
|
let condition_expression = self.parse_expression();
|
||||||
|
let right_parenthesis_position = self
|
||||||
|
.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::Parentheses(condition_expression),
|
||||||
|
left_parenthesis_position,
|
||||||
|
right_parenthesis_position,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
self.parse_expression()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a branch body for a control-flow construct.
|
||||||
|
///
|
||||||
|
/// Normalizes the following source forms into a [`BranchBody`]:
|
||||||
|
///
|
||||||
|
/// - empty body with semicolon: `if (cond);`
|
||||||
|
/// - empty body before a closing `}`: `if (cond) }`
|
||||||
|
/// - non-empty block body: `if (cond) { ... }`
|
||||||
|
/// - non-empty single-expression body: `if (cond) expr;`
|
||||||
|
///
|
||||||
|
/// For non-block bodies, this method consumes a trailing `;` when present
|
||||||
|
/// and records its position in the returned [`BranchBody`].
|
||||||
|
fn parse_branch_body(&mut self) -> BranchBody<'src, 'arena> {
|
||||||
|
let Some((first_token, first_token_position)) = self.peek_token_and_position() else {
|
||||||
|
let error = self.make_error_here(ParseErrorKind::MissingBranchBody);
|
||||||
|
self.report_error(error);
|
||||||
|
return BranchBody {
|
||||||
|
expression: None,
|
||||||
|
semicolon_position: None,
|
||||||
|
end_anchor_token_position: error.covered_span.token_to,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
// `if (is_condition);`
|
||||||
|
if first_token == Token::Semicolon {
|
||||||
|
self.advance(); // ';'
|
||||||
|
return BranchBody {
|
||||||
|
expression: None,
|
||||||
|
semicolon_position: Some(first_token_position),
|
||||||
|
end_anchor_token_position: first_token_position,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// `{ ... if (is_condition) }`
|
||||||
|
if first_token == Token::RightBrace {
|
||||||
|
return BranchBody {
|
||||||
|
expression: None,
|
||||||
|
semicolon_position: None,
|
||||||
|
// `unwrap` actually triggering is effectively impossible,
|
||||||
|
// because by the time a branch body is parsed, some prior token
|
||||||
|
// (e.g. `if`, `)`, etc.) has already been consumed,
|
||||||
|
// so the parser should have a last-consumed position
|
||||||
|
end_anchor_token_position: self
|
||||||
|
.last_consumed_position()
|
||||||
|
.unwrap_or(first_token_position),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
let branch_expression = self.parse_expression();
|
||||||
|
let end_anchor_token_position = branch_expression.span().token_to;
|
||||||
|
// A block body in `if {...}` or `if {...};` owns its own terminator;
|
||||||
|
// a following `;` does not belong to the branch body.
|
||||||
|
if let Expression::Block(_) = *branch_expression {
|
||||||
|
return BranchBody {
|
||||||
|
expression: Some(branch_expression),
|
||||||
|
semicolon_position: None,
|
||||||
|
end_anchor_token_position,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
// For single-expression bodies, consume a trailing semicolon if present
|
||||||
|
let trailing_semicolon_position = if self.eat(Token::Semicolon) {
|
||||||
|
self.last_consumed_position()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
BranchBody {
|
||||||
|
expression: Some(branch_expression),
|
||||||
|
semicolon_position: trailing_semicolon_position,
|
||||||
|
end_anchor_token_position: trailing_semicolon_position
|
||||||
|
.unwrap_or(end_anchor_token_position),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an `if` expression after the `if` keyword.
|
||||||
|
///
|
||||||
|
/// The resulting [`Expression::If`] spans from `if_keyword_position` to the
|
||||||
|
/// end of the `if` body, or to the end of the `else` body if one is
|
||||||
|
/// present.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_if_tail(
|
||||||
|
&mut self,
|
||||||
|
if_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let condition = self.parse_condition();
|
||||||
|
let body = self.parse_branch_body();
|
||||||
|
|
||||||
|
let (else_body, if_end_position) = if self.peek_keyword() == Some(Keyword::Else) {
|
||||||
|
self.advance(); // 'else'
|
||||||
|
let else_body = self.parse_branch_body();
|
||||||
|
let else_body_end = else_body.end_anchor_token_position;
|
||||||
|
(Some(else_body), else_body_end)
|
||||||
|
} else {
|
||||||
|
(None, body.end_anchor_token_position)
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = AstSpan::range(if_keyword_position, if_end_position);
|
||||||
|
self.arena.alloc_node(
|
||||||
|
Expression::If {
|
||||||
|
condition,
|
||||||
|
body,
|
||||||
|
else_body,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `while` expression after the `while` keyword.
|
||||||
|
///
|
||||||
|
/// The resulting [`Expression::While`] spans from `while_keyword_position`
|
||||||
|
/// to the end of its body.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_while_tail(
|
||||||
|
&mut self,
|
||||||
|
while_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let condition = self.parse_condition();
|
||||||
|
let body = self.parse_branch_body();
|
||||||
|
let span = AstSpan::range(while_keyword_position, body.end_anchor_token_position);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(Expression::While { condition, body }, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `do ... until ...` expression after the `do` keyword.
|
||||||
|
///
|
||||||
|
/// The resulting [`Expression::DoUntil`] spans from `do_keyword_position`
|
||||||
|
/// to the end of the condition.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_do_until_tail(
|
||||||
|
&mut self,
|
||||||
|
do_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let body = self.parse_branch_body();
|
||||||
|
|
||||||
|
let condition = if self
|
||||||
|
.expect_keyword(Keyword::Until, ParseErrorKind::DoMissingUntil)
|
||||||
|
.widen_error_span_from(do_keyword_position)
|
||||||
|
.report_error(self)
|
||||||
|
{
|
||||||
|
crate::arena::ArenaNode::new_in(
|
||||||
|
Expression::Error,
|
||||||
|
AstSpan::new(body.end_anchor_token_position),
|
||||||
|
self.arena,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
self.parse_condition()
|
||||||
|
};
|
||||||
|
let span = AstSpan::range(do_keyword_position, condition.span().token_to);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(Expression::DoUntil { condition, body }, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `foreach` expression after the `foreach` keyword.
|
||||||
|
///
|
||||||
|
/// The iterator part is consumed as a regular expression, followed by a
|
||||||
|
/// branch body.
|
||||||
|
///
|
||||||
|
/// The resulting [`Expression::ForEach`] spans from
|
||||||
|
/// `foreach_keyword_position` to the end of the body.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_foreach_tail(
|
||||||
|
&mut self,
|
||||||
|
foreach_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
// UnrealScript `foreach` iterator expressions are simple enough that
|
||||||
|
// they do not need the special parenthesized-condition handling used by
|
||||||
|
// `parse_condition()`.
|
||||||
|
let iterated_expression = self.parse_expression();
|
||||||
|
|
||||||
|
let body = self.parse_branch_body();
|
||||||
|
let span = AstSpan::range(foreach_keyword_position, body.end_anchor_token_position);
|
||||||
|
self.arena.alloc_node(
|
||||||
|
Expression::ForEach {
|
||||||
|
iterated_expression,
|
||||||
|
body,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether the upcoming tokens have the syntactic shape of a
|
||||||
|
/// `for (...)` header.
|
||||||
|
///
|
||||||
|
/// More precisely, this returns `true` iff the next token is `(` and a
|
||||||
|
/// top-level `;` appears before the matching `)` is closed or input ends.
|
||||||
|
///
|
||||||
|
/// This is used only for loop-vs-identifier disambiguation.
|
||||||
|
pub(crate) fn is_for_loop_header_ahead(&mut self) -> bool {
|
||||||
|
if self.peek_token() != Some(Token::LeftParenthesis) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
let mut nesting_depth: usize = 1;
|
||||||
|
let mut lookahead_token_offset: usize = 1;
|
||||||
|
while let Some(next_token) = self.peek_token_at(lookahead_token_offset) {
|
||||||
|
match next_token {
|
||||||
|
Token::LeftParenthesis => nesting_depth += 1,
|
||||||
|
Token::RightParenthesis => {
|
||||||
|
if nesting_depth <= 1 {
|
||||||
|
// End of the immediate `for (...)` group without a
|
||||||
|
// top-level `;`: not a loop header.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
nesting_depth -= 1;
|
||||||
|
}
|
||||||
|
Token::Semicolon if nesting_depth == 1 => return true,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
lookahead_token_offset += 1;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `for` expression after the `for` keyword.
|
||||||
|
///
|
||||||
|
/// This method expects the standard header shape
|
||||||
|
/// `for (initialization; condition; step)` and then parses a branch body.
|
||||||
|
///
|
||||||
|
/// Each header component may be omitted. The resulting [`Expression::For`]
|
||||||
|
/// spans from `for_keyword_position` to the end of the body.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_for_tail(
|
||||||
|
&mut self,
|
||||||
|
for_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
// This path is expected to be entered only after
|
||||||
|
// `is_for_loop_header_ahead()`, so the opening `(` and at least one
|
||||||
|
// top-level `;` should already be structurally guaranteed.
|
||||||
|
self.expect(
|
||||||
|
Token::LeftParenthesis,
|
||||||
|
ParseErrorKind::ForMissingOpeningParenthesis,
|
||||||
|
)
|
||||||
|
.widen_error_span_from(for_keyword_position)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
let initialization = if self.peek_token() == Some(Token::Semicolon) {
|
||||||
|
self.advance();
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let init = self.parse_expression();
|
||||||
|
self.expect(
|
||||||
|
Token::Semicolon,
|
||||||
|
ParseErrorKind::ForMissingInitializationSemicolon,
|
||||||
|
)
|
||||||
|
.report_error(self);
|
||||||
|
Some(init)
|
||||||
|
};
|
||||||
|
|
||||||
|
let condition = if self.peek_token() == Some(Token::Semicolon) {
|
||||||
|
self.advance();
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let condition = self.parse_expression();
|
||||||
|
self.expect(
|
||||||
|
Token::Semicolon,
|
||||||
|
ParseErrorKind::ForMissingConditionSemicolon,
|
||||||
|
)
|
||||||
|
.report_error(self);
|
||||||
|
Some(condition)
|
||||||
|
};
|
||||||
|
|
||||||
|
let step = if self.peek_token() == Some(Token::RightParenthesis) {
|
||||||
|
self.advance();
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let step = self.parse_expression();
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ForMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.widen_error_span_from(for_keyword_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
Some(step)
|
||||||
|
};
|
||||||
|
|
||||||
|
let body = self.parse_branch_body();
|
||||||
|
let span = AstSpan::range(for_keyword_position, body.end_anchor_token_position);
|
||||||
|
self.arena.alloc_node(
|
||||||
|
Expression::For {
|
||||||
|
initialization,
|
||||||
|
condition,
|
||||||
|
step,
|
||||||
|
body,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the continuation of a `return` expression after its keyword.
|
||||||
|
///
|
||||||
|
/// If the next token is not `;`, consumes a return value expression.
|
||||||
|
/// The terminating `;` is not consumed here.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_return_tail(
|
||||||
|
&mut self,
|
||||||
|
return_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
|
||||||
|
(None, AstSpan::new(return_keyword_position))
|
||||||
|
} else {
|
||||||
|
let returned_value = self.parse_expression();
|
||||||
|
let span = AstSpan::range(return_keyword_position, returned_value.span().token_to);
|
||||||
|
(Some(returned_value), span)
|
||||||
|
};
|
||||||
|
self.arena.alloc_node(Expression::Return(value), span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the continuation of a `break` expression after its keyword.
|
||||||
|
///
|
||||||
|
/// If the next token is not `;`, consumes a break value expression.
|
||||||
|
/// The terminating `;` is not consumed here.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_break_tail(
|
||||||
|
&mut self,
|
||||||
|
break_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let (value, span) = if self.peek_token() == Some(Token::Semicolon) {
|
||||||
|
(None, AstSpan::new(break_keyword_position))
|
||||||
|
} else {
|
||||||
|
let returned_value = self.parse_expression();
|
||||||
|
let span = AstSpan::range(break_keyword_position, returned_value.span().token_to);
|
||||||
|
(Some(returned_value), span)
|
||||||
|
};
|
||||||
|
self.arena.alloc_node(Expression::Break(value), span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the continuation of a `goto` expression after its keyword.
|
||||||
|
///
|
||||||
|
/// Accepts either a name literal or an identifier as the target label.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_goto_tail(
|
||||||
|
&mut self,
|
||||||
|
goto_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
if let Some((label_token, label_position)) = self.peek_token_and_position()
|
||||||
|
&& (label_token == Token::NameLiteral || label_token == Token::Identifier)
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
return self.arena.alloc_node_between(
|
||||||
|
Expression::Goto(label_position),
|
||||||
|
goto_keyword_position,
|
||||||
|
label_position,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
self.make_error_here(ParseErrorKind::GotoMissingLabel)
|
||||||
|
.widen_error_span_from(goto_keyword_position)
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.report_error(self);
|
||||||
|
crate::arena::ArenaNode::new_in(
|
||||||
|
Expression::Error,
|
||||||
|
AstSpan::new(goto_keyword_position),
|
||||||
|
self.arena,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
76
rottlib/src/parser/grammar/expression/identifier.rs
Normal file
76
rottlib/src/parser/grammar/expression/identifier.rs
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
//! Identifier parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Provides shared routines for parsing both regular and qualified identifiers,
|
||||||
|
//! e.g. `KFChar.ZombieClot`.
|
||||||
|
|
||||||
|
use crate::arena::{self, ArenaVec};
|
||||||
|
use crate::ast::{AstSpan, IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef};
|
||||||
|
use crate::lexer::{self, Token};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses an identifier.
|
||||||
|
///
|
||||||
|
/// On failure (unexpected end-of-file or a token that cannot be used as an
|
||||||
|
/// identifier), produces `invalid_identifier_error_kind`.
|
||||||
|
pub(crate) fn parse_identifier(
|
||||||
|
&mut self,
|
||||||
|
invalid_identifier_error_kind: ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, IdentifierToken> {
|
||||||
|
let (token, token_position) =
|
||||||
|
self.require_token_and_position(invalid_identifier_error_kind)?;
|
||||||
|
let identifier = Parser::identifier_token_from_token(token, token_position)
|
||||||
|
.ok_or_else(|| self.make_error_here(invalid_identifier_error_kind))?;
|
||||||
|
self.advance();
|
||||||
|
Ok(identifier)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns an [`IdentifierToken`] for `token` if it is valid as an
|
||||||
|
/// identifier name.
|
||||||
|
///
|
||||||
|
/// This helper performs only token-to-identifier validation/wrapping;
|
||||||
|
/// it does not consume input from the parser.
|
||||||
|
pub(crate) fn identifier_token_from_token(
|
||||||
|
token: Token,
|
||||||
|
token_position: lexer::TokenPosition,
|
||||||
|
) -> Option<IdentifierToken> {
|
||||||
|
token
|
||||||
|
.is_valid_identifier_name()
|
||||||
|
.then_some(IdentifierToken(token_position))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a qualified (dot-separated) identifier path,
|
||||||
|
/// e.g. `KFChar.ZombieClot`.
|
||||||
|
///
|
||||||
|
/// This is used for name paths where each segment must be
|
||||||
|
/// a valid identifier and segments are separated by `.` tokens.
|
||||||
|
///
|
||||||
|
/// On failure produces an error of specified [`ParseErrorKind`]
|
||||||
|
/// `invalid_identifier_error_kind`.
|
||||||
|
pub(crate) fn parse_qualified_identifier(
|
||||||
|
&mut self,
|
||||||
|
invalid_identifier_error_kind: ParseErrorKind,
|
||||||
|
) -> ParseResult<'src, 'arena, QualifiedIdentifierRef<'arena>> {
|
||||||
|
let head = self.parse_identifier(invalid_identifier_error_kind)?;
|
||||||
|
let mut tail = None;
|
||||||
|
|
||||||
|
let span_start = head.0;
|
||||||
|
let mut span_end = span_start;
|
||||||
|
while self.peek_token() == Some(Token::Period) {
|
||||||
|
self.advance(); // '.'
|
||||||
|
let next_segment = self
|
||||||
|
.parse_identifier(invalid_identifier_error_kind)
|
||||||
|
.widen_error_span_from(head.0)?;
|
||||||
|
span_end = next_segment.0;
|
||||||
|
|
||||||
|
let tail_vec = tail.get_or_insert_with(|| ArenaVec::new_in(self.arena));
|
||||||
|
tail_vec.push(next_segment);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(arena::ArenaNode::new_in(
|
||||||
|
QualifiedIdentifier { head, tail },
|
||||||
|
AstSpan::range(span_start, span_end),
|
||||||
|
self.arena,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
123
rottlib/src/parser/grammar/expression/literals.rs
Normal file
123
rottlib/src/parser/grammar/expression/literals.rs
Normal file
@ -0,0 +1,123 @@
|
|||||||
|
//! Literal decoding for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! This module defines the semantic rules for interpreting literal tokens
|
||||||
|
//! produced by the lexer. It is responsible only for *decoding* the textual
|
||||||
|
//! representation of literals into their internal values.
|
||||||
|
//!
|
||||||
|
//! The rules implemented here intentionally mirror the quirks of
|
||||||
|
//! Unreal Engine 2’s `UnrealScript`.
|
||||||
|
|
||||||
|
use crate::parser::{ParseErrorKind, ParseResult};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
/// Decodes an integer literal string into [`u128`].
|
||||||
|
///
|
||||||
|
/// Syntax:
|
||||||
|
/// - Optional base prefix: `0b` | `0o` | `0x` (case-insensitive).
|
||||||
|
/// No prefix -> decimal.
|
||||||
|
/// - Digits must match the base (`0-1`/`0-7`/`0-9A-F`).
|
||||||
|
/// - Underscores are allowed and ignored (e.g., `1_000`, `0xDE_AD`).
|
||||||
|
/// - No leading sign; parsed as a non-negative magnitude.
|
||||||
|
/// - Must fit within [`u128`].
|
||||||
|
///
|
||||||
|
/// Examples: `42`, `0b1010_0011`, `0o755`, `0xDEAD_BEEF`.
|
||||||
|
///
|
||||||
|
/// On failure, returns [`ParseErrorKind::InvalidNumericLiteral`] at
|
||||||
|
/// the parser's current cursor position.
|
||||||
|
pub(crate) fn decode_integer_literal(&self, literal: &str) -> ParseResult<'src, 'arena, u128> {
|
||||||
|
let (base, content) = match literal.split_at_checked(2) {
|
||||||
|
Some(("0b" | "0B", stripped)) => (2, stripped),
|
||||||
|
Some(("0o" | "0O", stripped)) => (8, stripped),
|
||||||
|
Some(("0x" | "0X", stripped)) => (16, stripped),
|
||||||
|
_ => (10, literal),
|
||||||
|
};
|
||||||
|
let digits_without_underscores = content.replace('_', "");
|
||||||
|
u128::from_str_radix(&digits_without_underscores, base)
|
||||||
|
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decodes a float literal as `f64`, following the permissive and only
|
||||||
|
/// partially documented behavior of `UnrealScript`.
|
||||||
|
///
|
||||||
|
/// Unreal Engine 2 does not define a precise and consistent set of rules
|
||||||
|
/// for float literals and the original compiler contains several quirks.
|
||||||
|
/// Because of this, we default to normalizing the text using a small set of
|
||||||
|
/// UnrealScript-specific rules and then parse the result using rust's
|
||||||
|
/// `f64` parser.
|
||||||
|
///
|
||||||
|
/// Rules implemented here:
|
||||||
|
/// - Only decimal floats and special literals (e.g. `NaN`, `inf`)
|
||||||
|
/// are supported (no hex or binary formats).
|
||||||
|
/// - A single trailing `f` or `F`, if present, is removed before parsing.
|
||||||
|
/// - The literal text is scanned for periods (`.`). If a second period
|
||||||
|
/// is found, everything from that second `.` onward is discarded.
|
||||||
|
///
|
||||||
|
/// Examples:
|
||||||
|
/// * `1.2.3e4` becomes `1.2`
|
||||||
|
/// * `1.2e3.4` becomes `1.2e3`
|
||||||
|
///
|
||||||
|
/// - After this truncation step, the remaining text is interpreted as a
|
||||||
|
/// normal rust `f64` literal. This means it may contain digits, at
|
||||||
|
/// most one decimal point, and an optional exponent part (for example
|
||||||
|
/// `e3` or `E-2`), but it must otherwise follow rust's `f64` syntax.
|
||||||
|
/// Underscores, spaces, and other unsupported characters cause a
|
||||||
|
/// parse error.
|
||||||
|
///
|
||||||
|
/// On failure, this function returns
|
||||||
|
/// [`ParseErrorKind::InvalidNumericLiteral`] at the current parser
|
||||||
|
/// position.
|
||||||
|
pub(crate) fn decode_float_literal(&self, literal: &str) -> ParseResult<'src, 'arena, f64> {
|
||||||
|
let content = literal
|
||||||
|
.strip_suffix('f')
|
||||||
|
.or_else(|| literal.strip_suffix('F'))
|
||||||
|
.unwrap_or(literal);
|
||||||
|
// Truncate after the second '.', matching UnrealScript behavior
|
||||||
|
let content = content
|
||||||
|
.match_indices('.')
|
||||||
|
.nth(1)
|
||||||
|
.and_then(|(period_index, _)| content.get(..period_index))
|
||||||
|
.unwrap_or(content);
|
||||||
|
content
|
||||||
|
.parse::<f64>()
|
||||||
|
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unescapes a tokenized string literal into an arena string.
|
||||||
|
///
|
||||||
|
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
||||||
|
/// Unknown escapes drop the backslash and emit the character unchanged
|
||||||
|
/// (`UnrealScript` behavior).
|
||||||
|
/// If `raw_string` ends with a trailing `\` (which should not happen for
|
||||||
|
/// well-formed tokens), that backslash is simply ignored.
|
||||||
|
///
|
||||||
|
/// This function assumes `raw_string` is the token text without surrounding
|
||||||
|
/// quotes.
|
||||||
|
pub(crate) fn unescape_string_literal(
|
||||||
|
&self,
|
||||||
|
raw_string: &str,
|
||||||
|
) -> crate::arena::ArenaString<'arena> {
|
||||||
|
let mut buffer = String::with_capacity(raw_string.len());
|
||||||
|
let mut characters = raw_string.chars();
|
||||||
|
while let Some(next_character) = characters.next() {
|
||||||
|
if next_character == '\\' {
|
||||||
|
// Under the lexer contract, string tokens do not end with a lone
|
||||||
|
// backslash, so there is always a following character. If this
|
||||||
|
// invariant is broken, the final '\' is simply ignored here.
|
||||||
|
if let Some(escaped_character) = characters.next() {
|
||||||
|
match escaped_character {
|
||||||
|
'n' => buffer.push('\n'),
|
||||||
|
't' => buffer.push('\t'),
|
||||||
|
'"' => buffer.push('"'),
|
||||||
|
'\\' => buffer.push('\\'),
|
||||||
|
// Simply leaving the escaped character matches
|
||||||
|
// UnrealScript behavior.
|
||||||
|
unrecognized_escape_char => buffer.push(unrecognized_escape_char),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buffer.push(next_character);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.arena.string(&buffer)
|
||||||
|
}
|
||||||
|
}
|
||||||
32
rottlib/src/parser/grammar/expression/mod.rs
Normal file
32
rottlib/src/parser/grammar/expression/mod.rs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
//! Expression parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! This module group implements the language's expression parser around a
|
||||||
|
//! Pratt-style core. It is split into small submodules by role: precedence,
|
||||||
|
//! identifiers, literals, selectors, block bodies, keyword-led/control-flow
|
||||||
|
//! forms, primary-expression dispatch, and the Pratt driver itself.
|
||||||
|
//!
|
||||||
|
//! The parser is designed to keep building a best-effort AST on malformed
|
||||||
|
//! input. Syntax problems are reported through diagnostics, while committed
|
||||||
|
//! parsers recover locally and return fallback nodes or partial structures when
|
||||||
|
//! necessary.
|
||||||
|
//!
|
||||||
|
//! ## Expression layering
|
||||||
|
//!
|
||||||
|
//! The parser distinguishes several layers of expression parsing:
|
||||||
|
//!
|
||||||
|
//! - **primaries**: forms that can be parsed directly from the current token,
|
||||||
|
//! without an already parsed left-hand side;
|
||||||
|
//! - **selectors**: suffix continuations such as member access, indexing, and
|
||||||
|
//! calls, which require a left-hand side;
|
||||||
|
//! - **prefix / postfix / infix operators**: handled by the Pratt parser using
|
||||||
|
//! precedence ranks.
|
||||||
|
|
||||||
|
mod block; // `{ ... }` block-body parsing and block/expression item handling.
|
||||||
|
mod control_flow; // `if`, `while`, `do`, `foreach`, `for`, `return`, etc.
|
||||||
|
mod identifier; // Identifier and qualified-name parsing helpers.
|
||||||
|
mod literals; // Literal decoding and literal-specific parsing utilities.
|
||||||
|
mod pratt; // Top-level Pratt driver.
|
||||||
|
mod precedence; // Operator precedence ranks and Pratt binding rules.
|
||||||
|
mod primary; // Primary-expression parsing and keyword-vs-identifier dispatch.
|
||||||
|
mod selectors; // Suffix continuations: member access, indexing, and calls.
|
||||||
|
mod switch; // `switch (...) { ... }` parsing and arm/body recovery.
|
||||||
194
rottlib/src/parser/grammar/expression/pratt.rs
Normal file
194
rottlib/src/parser/grammar/expression/pratt.rs
Normal file
@ -0,0 +1,194 @@
|
|||||||
|
//! Core of the expression parser for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! This module implements a Pratt-style parser for the language's expression
|
||||||
|
//! grammar, supporting:
|
||||||
|
//!
|
||||||
|
//! * Primary expressions (see [`crate::parser::primary`] for details on what
|
||||||
|
//! we consider to be a primary expression);
|
||||||
|
//! * Prefix operators;
|
||||||
|
//! * Postfix operators;
|
||||||
|
//! * Infix operators with hard-coded precedence and associativity.
|
||||||
|
//!
|
||||||
|
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
|
||||||
|
//! operators bind. Infix parsing uses the pair of binding powers returned by
|
||||||
|
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
|
||||||
|
//! The parser infrastructure supports both left- and right-associative
|
||||||
|
//! operators, but Fermented `UnrealScript` currently defines only
|
||||||
|
//! left-associative ones.
|
||||||
|
//!
|
||||||
|
//! ## Postfix operator vs "selectors"
|
||||||
|
//!
|
||||||
|
//! Everywhere here we distinguish *selectors* like field accessor `.`,
|
||||||
|
//! function call `()` or array indices `[]` from other *postfix operators*
|
||||||
|
//! as they:
|
||||||
|
//!
|
||||||
|
//! 1. Have significantly different semantic meaning;
|
||||||
|
//! 2. Are not considered operators from `UnrealScript`'s viewpoint
|
||||||
|
//! (e.g. cannot be overloaded).
|
||||||
|
//!
|
||||||
|
//! ## See also
|
||||||
|
//!
|
||||||
|
//! - [`parser::Parser::parse_expression`] - main entry point
|
||||||
|
//! - [`PrecedenceRank`] - operator binding strengths
|
||||||
|
//! - [`super::precedence`] - operator precedence definitions
|
||||||
|
|
||||||
|
use crate::ast::{self, Expression, ExpressionRef};
|
||||||
|
use crate::parser::{self, Parser, ResultRecoveryExt};
|
||||||
|
|
||||||
|
pub use super::precedence::PrecedenceRank;
|
||||||
|
|
||||||
|
/// Returns whether postfix operators like `++` and `--` are disallowed
|
||||||
|
/// after this expression.
|
||||||
|
///
|
||||||
|
/// This restriction applies only to postfix operators. Selectors such as
|
||||||
|
/// field access `.x`, indexing `[i]`, and calls `(args)` remain allowed.
|
||||||
|
fn forbids_postfix_operators(expression: &ExpressionRef<'_, '_>) -> bool {
|
||||||
|
matches!(
|
||||||
|
**expression,
|
||||||
|
Expression::If { .. }
|
||||||
|
| Expression::While { .. }
|
||||||
|
| Expression::DoUntil { .. }
|
||||||
|
| Expression::For { .. }
|
||||||
|
| Expression::ForEach { .. }
|
||||||
|
| Expression::Switch { .. }
|
||||||
|
| Expression::Block { .. }
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses an expression.
|
||||||
|
///
|
||||||
|
/// Always returns some expression node; any syntax errors are reported
|
||||||
|
/// through the parser's diagnostics.
|
||||||
|
#[must_use]
|
||||||
|
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
|
||||||
|
self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an expression, including only operators with binding power
|
||||||
|
/// at least `min_precedence_rank` (as tight or tighter).
|
||||||
|
fn parse_expression_with_min_precedence_rank(
|
||||||
|
&mut self,
|
||||||
|
min_precedence_rank: PrecedenceRank,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let mut left_hand_side = self
|
||||||
|
.parse_prefix_or_primary()
|
||||||
|
.sync_error_until(self, parser::SyncLevel::Expression)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
left_hand_side = self
|
||||||
|
.parse_selectors_into(left_hand_side)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
// We disallow only postfix operators after expression forms that
|
||||||
|
// represent control-flow or block constructs. Selectors are still
|
||||||
|
// parsed normally.
|
||||||
|
// This avoids ambiguities in cases like:
|
||||||
|
//
|
||||||
|
// ```unrealscript
|
||||||
|
// if test() { do_it(); }
|
||||||
|
// ++ counter;
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// This wasn't a problem in UnrealScript, because such constructs were
|
||||||
|
// never treated as expressions. And it shouldn't be an issue for us
|
||||||
|
// because neither `--` or `++` (the only existing default postfix
|
||||||
|
// operators) make any sense after such expressions anyway.
|
||||||
|
if !forbids_postfix_operators(&left_hand_side) {
|
||||||
|
left_hand_side = self.parse_postfix_into(left_hand_side);
|
||||||
|
}
|
||||||
|
self.parse_infix_into(left_hand_side, min_precedence_rank)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a prefix or primary expression (Pratt parser's "nud" or
|
||||||
|
/// null denotation).
|
||||||
|
fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> {
|
||||||
|
let (token, token_lexeme, token_position) =
|
||||||
|
self.require_token_lexeme_and_position(parser::ParseErrorKind::MissingExpression)?;
|
||||||
|
self.advance();
|
||||||
|
if let Ok(operator) = ast::PrefixOperator::try_from(token) {
|
||||||
|
// In UnrealScript, prefix and postfix operators bind tighter than
|
||||||
|
// any infix operators, so we can safely parse the right hand side
|
||||||
|
// at the tightest precedence.
|
||||||
|
let right_hand_side =
|
||||||
|
self.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST);
|
||||||
|
Ok(Expression::new_prefix(
|
||||||
|
self.arena,
|
||||||
|
token_position,
|
||||||
|
operator,
|
||||||
|
right_hand_side,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
self.parse_primary_from_current_token(token, token_lexeme, token_position)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses all postfix operators it can, creating a tree with
|
||||||
|
/// `left_hand_side` as a child.
|
||||||
|
fn parse_postfix_into(
|
||||||
|
&mut self,
|
||||||
|
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
while let Some((operator, operator_position)) = self.peek_postfix_with_position() {
|
||||||
|
self.advance();
|
||||||
|
left_hand_side =
|
||||||
|
Expression::new_postfix(self.arena, left_hand_side, operator, operator_position);
|
||||||
|
}
|
||||||
|
left_hand_side
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses infix operators binding at least as tight as
|
||||||
|
/// `min_precedence_rank`.
|
||||||
|
///
|
||||||
|
/// Associativity is encoded by
|
||||||
|
/// [`super::precedence::infix_precedence_ranks`].
|
||||||
|
///
|
||||||
|
/// Stops when the next operator is looser than `min_precedence_rank`.
|
||||||
|
fn parse_infix_into(
|
||||||
|
&mut self,
|
||||||
|
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
min_precedence_rank: PrecedenceRank,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
while let Some((operator, right_precedence_rank)) =
|
||||||
|
self.peek_infix_with_min_precedence_rank(min_precedence_rank)
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
let right_hand_side =
|
||||||
|
self.parse_expression_with_min_precedence_rank(right_precedence_rank);
|
||||||
|
left_hand_side =
|
||||||
|
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
|
||||||
|
}
|
||||||
|
left_hand_side
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the next postfix operator and its position if present.
|
||||||
|
///
|
||||||
|
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
||||||
|
/// loop without unwraps.
|
||||||
|
fn peek_postfix_with_position(
|
||||||
|
&mut self,
|
||||||
|
) -> Option<(ast::PostfixOperator, crate::lexer::TokenPosition)> {
|
||||||
|
let (token, token_position) = self.peek_token_and_position()?;
|
||||||
|
let Ok(operator) = ast::PostfixOperator::try_from(token) else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
Some((operator, token_position))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If the next token is an infix operator with left binding power at least
|
||||||
|
/// `min_precedence_rank`, returns its operator and the minimum precedence
|
||||||
|
/// rank to use when parsing the right-hand side (i.e. the operator's right
|
||||||
|
/// binding power).
|
||||||
|
///
|
||||||
|
/// Otherwise returns [`None`].
|
||||||
|
fn peek_infix_with_min_precedence_rank(
|
||||||
|
&mut self,
|
||||||
|
min_precedence_rank: PrecedenceRank,
|
||||||
|
) -> Option<(ast::InfixOperator, PrecedenceRank)> {
|
||||||
|
let (left_precedence_rank, operator, right_precedence_rank) = self
|
||||||
|
.peek_token()
|
||||||
|
.and_then(super::precedence::infix_precedence_ranks)?;
|
||||||
|
if left_precedence_rank.is_looser_than(min_precedence_rank) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
Some((operator, right_precedence_rank))
|
||||||
|
}
|
||||||
|
}
|
||||||
93
rottlib/src/parser/grammar/expression/precedence.rs
Normal file
93
rottlib/src/parser/grammar/expression/precedence.rs
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
//! Precedence tables for Fermented `UnrealScript` operators.
|
||||||
|
//!
|
||||||
|
//! These values don't follow the usual *binding power* convention for
|
||||||
|
//! a Pratt parser, where tighter binding corresponds to a larger number.\
|
||||||
|
//! Here, the smaller the number, the tighter the binding power.\
|
||||||
|
//! For this reason, we use the term *precedence rank* instead.
|
||||||
|
//!
|
||||||
|
//! ## Operators sorted by precedence (lowest number = tighter binding)
|
||||||
|
//!
|
||||||
|
//! ### Infix operators
|
||||||
|
//!
|
||||||
|
//! All infix operators in `UnrealScript` are
|
||||||
|
//! [left-associative](https://wiki.beyondunreal.com/Operators).
|
||||||
|
//!
|
||||||
|
//! 12: `**`
|
||||||
|
//! 16: `*`, `/`, `Cross`, `Dot`
|
||||||
|
//! 18: `%`
|
||||||
|
//! 20: `+`, `-`
|
||||||
|
//! 22: `<<`, `>>`, `>>>`
|
||||||
|
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
|
||||||
|
//! 26: `!=`
|
||||||
|
//! 28: `&`, `^`, `|`
|
||||||
|
//! 30: `&&`, `^^`
|
||||||
|
//! 32: `||`
|
||||||
|
//! 34: `*=`, `/=`, `+=`, `-=`
|
||||||
|
//! 40: `$`, `*`, `@`
|
||||||
|
//! 44: `$=`, `*=`, `@=`
|
||||||
|
//! 45: `-=`
|
||||||
|
//!
|
||||||
|
//! Some operator, such as `*`, appear twice with different precedence
|
||||||
|
//! ranks because they were defined with different values for different types
|
||||||
|
//! in separate script source files (as in the Killing Floor sources).\
|
||||||
|
//! However, `UnrealScript` uses only the first definition it encounters in
|
||||||
|
//! `Object.uc`, which corresponds to the lower value.
|
||||||
|
//!
|
||||||
|
//! ### Prefix operators
|
||||||
|
//!
|
||||||
|
//! `!`, `~`, `+`, `-`, `++`, `--`.
|
||||||
|
//!
|
||||||
|
//! ### Postfix operators
|
||||||
|
//!
|
||||||
|
//! `++`, `--`.
|
||||||
|
|
||||||
|
use crate::ast::{InfixOperator, infix_operator_info};
|
||||||
|
use crate::lexer::Token;
|
||||||
|
|
||||||
|
/// Compact precedence rank used by the Pratt Parser.
|
||||||
|
///
|
||||||
|
/// A smaller number means tighter binding, and a larger number means looser
|
||||||
|
/// binding. This inverted scale matches how `UnrealScript` tables were recorded.
|
||||||
|
#[must_use]
|
||||||
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct PrecedenceRank(u8);
|
||||||
|
|
||||||
|
impl PrecedenceRank {
|
||||||
|
/// The loosest possible precedence rank.
|
||||||
|
///
|
||||||
|
/// In this inverted scale (smaller number = tighter binding),
|
||||||
|
/// this is represented by the maximum [`u8`] value.
|
||||||
|
pub const LOOSEST: Self = Self(u8::MAX);
|
||||||
|
|
||||||
|
/// The tightest possible precedence rank.
|
||||||
|
///
|
||||||
|
/// In this inverted scale (smaller number = tighter binding),
|
||||||
|
/// this is represented by zero.
|
||||||
|
pub const TIGHTEST: Self = Self(0);
|
||||||
|
|
||||||
|
/// Returns `true` if `self` has a looser binding than `other`.
|
||||||
|
pub const fn is_looser_than(self, other: Self) -> bool {
|
||||||
|
self.0 > other.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Maps a token to its infix operator along with its left and right binding
|
||||||
|
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
|
||||||
|
///
|
||||||
|
/// Returns [`None`] if and only if `token` is not an infix operator.
|
||||||
|
pub fn infix_precedence_ranks(
|
||||||
|
token: Token,
|
||||||
|
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
|
||||||
|
let info = infix_operator_info(token)?;
|
||||||
|
// All operators are left-associative, so `right_precedence_rank` is set to
|
||||||
|
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
|
||||||
|
// enforces left associativity in Pratt parsing).
|
||||||
|
//
|
||||||
|
// Since all precedences are even, subtracting one won't actually cross
|
||||||
|
// any boundary between operator groups.
|
||||||
|
Some((
|
||||||
|
PrecedenceRank(info.right_precedence_rank),
|
||||||
|
info.operator,
|
||||||
|
PrecedenceRank(info.right_precedence_rank - 1),
|
||||||
|
))
|
||||||
|
}
|
||||||
463
rottlib/src/parser/grammar/expression/primary.rs
Normal file
463
rottlib/src/parser/grammar/expression/primary.rs
Normal file
@ -0,0 +1,463 @@
|
|||||||
|
//! Parser for primary expressions in Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! This module implements parsing of primary expressions via
|
||||||
|
//! [`Parser::parse_primary_from_current_token`] and its helper
|
||||||
|
//! [`Parser::parse_keyword_primary`].
|
||||||
|
//!
|
||||||
|
//! ## What is a "primary expression" here?
|
||||||
|
//!
|
||||||
|
//! In this module, "primary" is used somewhat more broadly than in a
|
||||||
|
//! textbook grammar, but it still has one essential property:
|
||||||
|
//!
|
||||||
|
//! A primary expression is an expression form that can be parsed
|
||||||
|
//! directly from the current token, without requiring an already
|
||||||
|
//! parsed left-hand side.
|
||||||
|
//!
|
||||||
|
//! This includes ordinary primaries such as literals, identifiers, and
|
||||||
|
//! parenthesized expressions, as well as keyword-led forms such as
|
||||||
|
//! `if`, `while`, `for`, `foreach`, `switch`, `return`, `break`,
|
||||||
|
//! `continue`, `new`, and `class<...>`.
|
||||||
|
//!
|
||||||
|
//! By contrast, selectors, postfix operators, and infix operators are
|
||||||
|
//! not primaries. They cannot stand on their own here: they are parsed
|
||||||
|
//! only as continuations of an already parsed expression.
|
||||||
|
//!
|
||||||
|
//! So "primary" here does not mean "smallest atomic expression".
|
||||||
|
//! It means "an expression form that does not need a left-hand side
|
||||||
|
//! in order to be parsed".
|
||||||
|
|
||||||
|
use super::selectors::ParsedCallArgumentSlot;
|
||||||
|
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a primary expression starting from the provided token.
|
||||||
|
///
|
||||||
|
/// The provided token is assumed to be the already consumed first token of
|
||||||
|
/// the primary expression.
|
||||||
|
///
|
||||||
|
/// This includes literals, identifiers, grouped expressions, block
|
||||||
|
/// expressions, and certain keyword-led forms.
|
||||||
|
///
|
||||||
|
/// It does not parse selectors, postfix operators, or infix operators;
|
||||||
|
/// those are handled afterwards as continuations of the parsed primary.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`ParseErrorKind::ExpressionExpected`] if the provided
|
||||||
|
/// token cannot begin any valid primary expression in this position.
|
||||||
|
pub(crate) fn parse_primary_from_current_token(
|
||||||
|
&mut self,
|
||||||
|
token: Token,
|
||||||
|
token_lexeme: &'src str,
|
||||||
|
token_position: TokenPosition,
|
||||||
|
) -> ParseExpressionResult<'src, 'arena> {
|
||||||
|
Ok(match token {
|
||||||
|
Token::IntegerLiteral => {
|
||||||
|
let value = self.decode_integer_literal(token_lexeme)?;
|
||||||
|
self.arena
|
||||||
|
.alloc_node_at(Expression::Integer(value), token_position)
|
||||||
|
}
|
||||||
|
Token::FloatLiteral => {
|
||||||
|
let value = self.decode_float_literal(token_lexeme)?;
|
||||||
|
self.arena
|
||||||
|
.alloc_node_at(Expression::Float(value), token_position)
|
||||||
|
}
|
||||||
|
Token::StringLiteral => {
|
||||||
|
let value = self.unescape_string_literal(token_lexeme);
|
||||||
|
self.arena
|
||||||
|
.alloc_node_at(Expression::String(value), token_position)
|
||||||
|
}
|
||||||
|
Token::NameLiteral => self.arena.alloc_node_at(
|
||||||
|
Expression::NameLiteral {
|
||||||
|
tag: None,
|
||||||
|
name: token_lexeme,
|
||||||
|
},
|
||||||
|
token_position,
|
||||||
|
),
|
||||||
|
Token::LeftParenthesis => self.parse_parenthesized_expression_tail(token_position),
|
||||||
|
Token::LeftBrace => self.parse_block_tail(token_position),
|
||||||
|
Token::Keyword(keyword) => match self.parse_keyword_primary(keyword, token_position) {
|
||||||
|
Some(keyword_expression) => keyword_expression,
|
||||||
|
None => return self.parse_identifier_like_primary(token, token_position),
|
||||||
|
},
|
||||||
|
_ => return self.parse_identifier_like_primary(token, token_position),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a keyword-led primary expression.
|
||||||
|
///
|
||||||
|
/// Returns `None` if the keyword should instead be interpreted as an
|
||||||
|
/// identifier in this position.
|
||||||
|
fn parse_keyword_primary(
|
||||||
|
&mut self,
|
||||||
|
keyword: Keyword,
|
||||||
|
token_position: TokenPosition,
|
||||||
|
) -> OptionalExpression<'src, 'arena> {
|
||||||
|
Some(match keyword {
|
||||||
|
Keyword::True => self
|
||||||
|
.arena
|
||||||
|
.alloc_node_at(Expression::Bool(true), token_position),
|
||||||
|
Keyword::False => self
|
||||||
|
.arena
|
||||||
|
.alloc_node_at(Expression::Bool(false), token_position),
|
||||||
|
Keyword::None => self.arena.alloc_node_at(Expression::None, token_position),
|
||||||
|
Keyword::If => self.parse_if_tail(token_position),
|
||||||
|
Keyword::While => self.parse_while_tail(token_position),
|
||||||
|
Keyword::Do => self.parse_do_until_tail(token_position),
|
||||||
|
Keyword::ForEach => self.parse_foreach_tail(token_position),
|
||||||
|
Keyword::Return => self.parse_return_tail(token_position),
|
||||||
|
Keyword::Break => self.parse_break_tail(token_position),
|
||||||
|
Keyword::Continue => self
|
||||||
|
.arena
|
||||||
|
.alloc_node_at(Expression::Continue, token_position),
|
||||||
|
Keyword::New => self.parse_new_expression_tail(token_position),
|
||||||
|
// These keywords remain valid identifiers unless the following
|
||||||
|
// tokens commit to the keyword-led form.
|
||||||
|
Keyword::For if self.is_for_loop_header_ahead() => self.parse_for_tail(token_position),
|
||||||
|
Keyword::Goto if !matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
|
||||||
|
self.parse_goto_tail(token_position)
|
||||||
|
}
|
||||||
|
// `switch` is only treated as keyword-led when followed by `(`
|
||||||
|
// to match the syntax accepted by the existing codebase.
|
||||||
|
Keyword::Switch if matches!(self.peek_token(), Some(Token::LeftParenthesis)) => {
|
||||||
|
self.parse_switch_tail(token_position)
|
||||||
|
}
|
||||||
|
Keyword::Class => {
|
||||||
|
if let Some((Token::Less, left_angle_bracket_position)) =
|
||||||
|
self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.advance(); // '<'
|
||||||
|
self.parse_class_type_tail(token_position, left_angle_bracket_position)
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => return None,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attempts to parse the already-consumed token as an identifier or tagged
|
||||||
|
/// name literal.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// Returns [`ParseErrorKind::ExpressionExpected`] if the token
|
||||||
|
/// cannot be used as an identifier in this position.
|
||||||
|
fn parse_identifier_like_primary(
|
||||||
|
&mut self,
|
||||||
|
primary_token: Token,
|
||||||
|
primary_token_position: TokenPosition,
|
||||||
|
) -> ParseExpressionResult<'src, 'arena> {
|
||||||
|
let identifier_token =
|
||||||
|
Parser::identifier_token_from_token(primary_token, primary_token_position).ok_or_else(
|
||||||
|
|| self.make_error_at(ParseErrorKind::ExpressionExpected, primary_token_position),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
// A token that is valid as an identifier may still start a tagged-name
|
||||||
|
// literal such as `Texture'Foo.Bar'`.
|
||||||
|
let expression = if let Some((Token::NameLiteral, lexeme, name_position)) =
|
||||||
|
self.peek_token_lexeme_and_position()
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::NameLiteral {
|
||||||
|
tag: Some(identifier_token),
|
||||||
|
name: lexeme,
|
||||||
|
},
|
||||||
|
primary_token_position,
|
||||||
|
name_position,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
self.arena.alloc_node_at(
|
||||||
|
Expression::Identifier(identifier_token),
|
||||||
|
primary_token_position,
|
||||||
|
)
|
||||||
|
};
|
||||||
|
Ok(expression)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a parenthesized expression.
|
||||||
|
///
|
||||||
|
/// Assumes the opening `(` has already been consumed.
|
||||||
|
/// Reports and recovers from a missing closing `)`.
|
||||||
|
fn parse_parenthesized_expression_tail(
|
||||||
|
&mut self,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
// Special case for an empty expression
|
||||||
|
if let Some((Token::RightParenthesis, right_parenthesis_position)) =
|
||||||
|
self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.make_error_here(ParseErrorKind::ParenthesizedExpressionEmpty {
|
||||||
|
left_parenthesis_position,
|
||||||
|
})
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.blame_token(right_parenthesis_position)
|
||||||
|
.report_error(self);
|
||||||
|
return self.arena.alloc_node_between(
|
||||||
|
Expression::Error,
|
||||||
|
left_parenthesis_position,
|
||||||
|
right_parenthesis_position,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Continue parsing normally
|
||||||
|
let inner_expression = if self.next_token_definitely_cannot_start_expression() {
|
||||||
|
let error = self
|
||||||
|
.make_error_here(ParseErrorKind::ExpressionExpected)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::Expression)
|
||||||
|
.related_token(left_parenthesis_position);
|
||||||
|
let error_span = error.covered_span;
|
||||||
|
self.report_error(error);
|
||||||
|
return crate::arena::ArenaNode::new_in(
|
||||||
|
crate::ast::Expression::Error,
|
||||||
|
error_span,
|
||||||
|
self.arena,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
self.parse_expression()
|
||||||
|
};
|
||||||
|
let right_parenthesis_position = self
|
||||||
|
.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::Parentheses(inner_expression),
|
||||||
|
left_parenthesis_position,
|
||||||
|
right_parenthesis_position,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a class type expression of the form `class<...>`.
|
||||||
|
///
|
||||||
|
/// Assumes the `class` keyword and following '<' token have already been
|
||||||
|
/// consumed. Reports and recovers from malformed type syntax locally.
|
||||||
|
fn parse_class_type_tail(
|
||||||
|
&mut self,
|
||||||
|
class_keyword_position: TokenPosition,
|
||||||
|
left_angle_bracket_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
// Special case for an empty argument
|
||||||
|
if let Some((Token::Greater, right_angle_bracket_position)) = self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.make_error_here(ParseErrorKind::ClassTypeMissingTypeArgument {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
})
|
||||||
|
.widen_error_span_from(left_angle_bracket_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseAngleBracket)
|
||||||
|
.blame_token(right_angle_bracket_position)
|
||||||
|
.report_error(self);
|
||||||
|
return self.arena.alloc_node_between(
|
||||||
|
Expression::Error,
|
||||||
|
class_keyword_position,
|
||||||
|
right_angle_bracket_position,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Qualified identifiers do not have a meaningful fallback option
|
||||||
|
let class_type = match self
|
||||||
|
.parse_qualified_identifier(ParseErrorKind::ClassTypeInvalidTypeArgument {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
})
|
||||||
|
.widen_error_span_from(class_keyword_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseAngleBracket)
|
||||||
|
{
|
||||||
|
Ok(class_type) => class_type,
|
||||||
|
Err(error) => {
|
||||||
|
self.report_error(error);
|
||||||
|
return self.arena.alloc_node_between(
|
||||||
|
Expression::Error,
|
||||||
|
class_keyword_position,
|
||||||
|
self.last_consumed_position()
|
||||||
|
.unwrap_or(class_keyword_position),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let right_angle_bracket_position = self
|
||||||
|
.expect(
|
||||||
|
Token::Greater,
|
||||||
|
ParseErrorKind::ClassTypeMissingClosingAngleBracket {
|
||||||
|
left_angle_bracket_position,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.widen_error_span_from(class_keyword_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseAngleBracket)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::ClassType(class_type),
|
||||||
|
class_keyword_position,
|
||||||
|
right_angle_bracket_position,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `new` expression with an optional parenthesized argument list.
|
||||||
|
///
|
||||||
|
/// Assumes the `new` keyword has already been consumed.
|
||||||
|
/// The parenthesized argument list is optional.
|
||||||
|
fn parse_new_expression_tail(
|
||||||
|
&mut self,
|
||||||
|
new_keyword_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let (outer_argument, name_argument, flags_argument) =
|
||||||
|
if let Some((Token::LeftParenthesis, left_parenthesis_position)) =
|
||||||
|
self.peek_token_and_position()
|
||||||
|
{
|
||||||
|
self.advance();
|
||||||
|
self.parse_new_argument_list_tail(left_parenthesis_position)
|
||||||
|
} else {
|
||||||
|
(None, None, None)
|
||||||
|
};
|
||||||
|
// The class specifier is often a literal class reference, but any
|
||||||
|
// expression is accepted here.
|
||||||
|
let class_specifier = if self.next_token_definitely_cannot_start_expression() {
|
||||||
|
let error = self
|
||||||
|
.make_error_here(ParseErrorKind::NewMissingClassSpecifier {
|
||||||
|
new_keyword_position,
|
||||||
|
})
|
||||||
|
.widen_error_span_from(new_keyword_position)
|
||||||
|
.sync_error_at(self, SyncLevel::Expression);
|
||||||
|
let error_span = error.covered_span;
|
||||||
|
self.report_error(error);
|
||||||
|
crate::arena::ArenaNode::new_in(crate::ast::Expression::Error, error_span, self.arena)
|
||||||
|
} else {
|
||||||
|
self.parse_expression()
|
||||||
|
};
|
||||||
|
let class_specifier_end_position = class_specifier.span().token_to;
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::New {
|
||||||
|
outer_argument,
|
||||||
|
name_argument,
|
||||||
|
flags_argument,
|
||||||
|
class_specifier,
|
||||||
|
},
|
||||||
|
new_keyword_position,
|
||||||
|
class_specifier_end_position,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses the optional parenthesized arguments of a `new` expression.
|
||||||
|
///
|
||||||
|
/// Assumes the opening `(` has already been consumed.
|
||||||
|
/// Returns the `outer`, `name`, and `flags` argument slots, each of which
|
||||||
|
/// may be omitted. Reports and recovers from a missing closing `)`.
|
||||||
|
fn parse_new_argument_list_tail(
|
||||||
|
&mut self,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> (
|
||||||
|
OptionalExpression<'src, 'arena>,
|
||||||
|
OptionalExpression<'src, 'arena>,
|
||||||
|
OptionalExpression<'src, 'arena>,
|
||||||
|
) {
|
||||||
|
let mut outer_argument = None;
|
||||||
|
let mut name_argument = None;
|
||||||
|
let mut flags_argument = None;
|
||||||
|
|
||||||
|
for slot in [&mut outer_argument, &mut name_argument, &mut flags_argument] {
|
||||||
|
match self.parse_call_argument_slot(left_parenthesis_position) {
|
||||||
|
ParsedCallArgumentSlot::Argument(argument) => *slot = argument,
|
||||||
|
ParsedCallArgumentSlot::NoMoreArguments => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some((next_token, next_token_position)) = self.peek_token_and_position()
|
||||||
|
&& next_token != Token::RightParenthesis
|
||||||
|
{
|
||||||
|
self.make_error_here(ParseErrorKind::NewTooManyArguments {
|
||||||
|
left_parenthesis_position,
|
||||||
|
})
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_until(self, SyncLevel::CloseParenthesis)
|
||||||
|
.blame_token(next_token_position)
|
||||||
|
.extend_blame_to_covered_end()
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::NewMissingClosingParenthesis {
|
||||||
|
left_parenthesis_position,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
(outer_argument, name_argument, flags_argument)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` iff the next token is definitely not a valid start of an
|
||||||
|
/// expression.
|
||||||
|
///
|
||||||
|
/// This is intentionally conservative:
|
||||||
|
/// - `true` means parsing an expression here is pointless;
|
||||||
|
/// - `false` means "might be valid", so the normal expression parser should
|
||||||
|
/// decide and potentially emit a more specific error.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn next_token_definitely_cannot_start_expression(&mut self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self.peek_token(),
|
||||||
|
None
|
||||||
|
// Closing delimiters / separators
|
||||||
|
| Some(Token::RightParenthesis)
|
||||||
|
| Some(Token::RightBrace)
|
||||||
|
| Some(Token::RightBracket)
|
||||||
|
| Some(Token::Semicolon)
|
||||||
|
| Some(Token::Comma)
|
||||||
|
| Some(Token::Colon)
|
||||||
|
| Some(Token::Question)
|
||||||
|
|
||||||
|
// Tokens that only continue a previous expression
|
||||||
|
| Some(Token::Period)
|
||||||
|
|
||||||
|
// Infix / postfix / assignment operators
|
||||||
|
| Some(Token::Exponentiation)
|
||||||
|
| Some(Token::Multiply)
|
||||||
|
| Some(Token::Divide)
|
||||||
|
| Some(Token::Modulo)
|
||||||
|
| Some(Token::ConcatSpace)
|
||||||
|
| Some(Token::Concat)
|
||||||
|
| Some(Token::LeftShift)
|
||||||
|
| Some(Token::LogicalRightShift)
|
||||||
|
| Some(Token::RightShift)
|
||||||
|
| Some(Token::Less)
|
||||||
|
| Some(Token::LessEqual)
|
||||||
|
| Some(Token::Greater)
|
||||||
|
| Some(Token::GreaterEqual)
|
||||||
|
| Some(Token::Equal)
|
||||||
|
| Some(Token::NotEqual)
|
||||||
|
| Some(Token::ApproximatelyEqual)
|
||||||
|
| Some(Token::BitwiseAnd)
|
||||||
|
| Some(Token::BitwiseOr)
|
||||||
|
| Some(Token::BitwiseXor)
|
||||||
|
| Some(Token::LogicalAnd)
|
||||||
|
| Some(Token::LogicalXor)
|
||||||
|
| Some(Token::LogicalOr)
|
||||||
|
| Some(Token::Assign)
|
||||||
|
| Some(Token::MultiplyAssign)
|
||||||
|
| Some(Token::DivideAssign)
|
||||||
|
| Some(Token::ModuloAssign)
|
||||||
|
| Some(Token::PlusAssign)
|
||||||
|
| Some(Token::MinusAssign)
|
||||||
|
| Some(Token::ConcatAssign)
|
||||||
|
| Some(Token::ConcatSpaceAssign)
|
||||||
|
|
||||||
|
// Non-expression trivia / technical tokens
|
||||||
|
| Some(Token::ExecDirective)
|
||||||
|
| Some(Token::CppBlock)
|
||||||
|
| Some(Token::Hash)
|
||||||
|
| Some(Token::LineComment)
|
||||||
|
| Some(Token::BlockComment)
|
||||||
|
| Some(Token::Newline)
|
||||||
|
| Some(Token::Whitespace)
|
||||||
|
| Some(Token::Error)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
197
rottlib/src/parser/grammar/expression/selectors.rs
Normal file
197
rottlib/src/parser/grammar/expression/selectors.rs
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
//! Parser for expression selectors in Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Selectors are suffix forms that extend an already parsed expression,
|
||||||
|
//! such as member access, indexing, and calls.
|
||||||
|
//!
|
||||||
|
//! Unlike primaries, selectors cannot be parsed on their own from the
|
||||||
|
//! current token. They always require a left-hand side expression.
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::AstSpan;
|
||||||
|
use crate::ast::{Expression, ExpressionRef, OptionalExpression};
|
||||||
|
use crate::lexer::{Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
/// Represents the result of parsing one call argument slot.
|
||||||
|
///
|
||||||
|
/// This distinguishes between the end of the argument list and a parsed
|
||||||
|
/// argument slot, including an omitted one.
|
||||||
|
#[must_use]
|
||||||
|
#[derive(Debug, PartialEq)]
|
||||||
|
pub enum ParsedCallArgumentSlot<'src, 'arena> {
|
||||||
|
/// Indicates that the argument list has ended.
|
||||||
|
NoMoreArguments,
|
||||||
|
/// The parsed argument for this slot.
|
||||||
|
///
|
||||||
|
/// `None` represents an omitted argument between commas.
|
||||||
|
Argument(OptionalExpression<'src, 'arena>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses zero or more postfix selectors after `left_hand_side`.
|
||||||
|
///
|
||||||
|
/// Returns the resulting expression after all contiguous selectors.
|
||||||
|
pub(crate) fn parse_selectors_into(
|
||||||
|
&mut self,
|
||||||
|
left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
) -> ParseExpressionResult<'src, 'arena> {
|
||||||
|
let mut left_hand_side = left_hand_side;
|
||||||
|
// `next_position` is used only to widen diagnostic spans.
|
||||||
|
while let Some((next_token, next_position)) = self.peek_token_and_position() {
|
||||||
|
left_hand_side = match next_token {
|
||||||
|
Token::Period => self.parse_selector_member_access_into(left_hand_side)?,
|
||||||
|
Token::LeftBracket => {
|
||||||
|
self.parse_selector_index_into(left_hand_side, next_position)?
|
||||||
|
}
|
||||||
|
Token::LeftParenthesis => {
|
||||||
|
self.parse_selector_call_into(left_hand_side, next_position)
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
Ok(left_hand_side)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a member access selector after `left_hand_side`.
|
||||||
|
///
|
||||||
|
/// Expects the leading `.` to be the next token and returns the resulting
|
||||||
|
/// member access expression.
|
||||||
|
fn parse_selector_member_access_into(
|
||||||
|
&mut self,
|
||||||
|
left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
) -> ParseExpressionResult<'src, 'arena> {
|
||||||
|
self.advance(); // `.`
|
||||||
|
let member_access_start = left_hand_side.span().token_from;
|
||||||
|
let member_identifier = self.parse_identifier(ParseErrorKind::ExpressionUnexpectedToken)?;
|
||||||
|
let member_access_end = member_identifier.0;
|
||||||
|
Ok(self.arena.alloc_node(
|
||||||
|
Expression::Member {
|
||||||
|
target: left_hand_side,
|
||||||
|
name: member_identifier,
|
||||||
|
},
|
||||||
|
AstSpan::range(member_access_start, member_access_end),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an index selector after `left_hand_side`.
|
||||||
|
///
|
||||||
|
/// Expects the leading `[` to be the next token and returns the resulting
|
||||||
|
/// indexing expression.
|
||||||
|
fn parse_selector_index_into(
|
||||||
|
&mut self,
|
||||||
|
left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
left_bracket_position: TokenPosition,
|
||||||
|
) -> ParseExpressionResult<'src, 'arena> {
|
||||||
|
self.advance(); // '['
|
||||||
|
let index_expression = self.parse_expression();
|
||||||
|
let right_bracket_position = self
|
||||||
|
.expect(
|
||||||
|
Token::RightBracket,
|
||||||
|
ParseErrorKind::ExpressionUnexpectedToken,
|
||||||
|
)
|
||||||
|
.widen_error_span_from(left_bracket_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseBracket)?;
|
||||||
|
|
||||||
|
let expression_start = left_hand_side.span().token_from;
|
||||||
|
Ok(self.arena.alloc_node_between(
|
||||||
|
Expression::Index {
|
||||||
|
target: left_hand_side,
|
||||||
|
index: index_expression,
|
||||||
|
},
|
||||||
|
expression_start,
|
||||||
|
right_bracket_position,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a call selector after `left_hand_side`.
|
||||||
|
///
|
||||||
|
/// Expects the leading `(` to be the next token and returns the resulting
|
||||||
|
/// call expression.
|
||||||
|
fn parse_selector_call_into(
|
||||||
|
&mut self,
|
||||||
|
left_hand_side: ExpressionRef<'src, 'arena>,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
self.advance(); // '('
|
||||||
|
let argument_list = self.parse_call_argument_list(left_parenthesis_position);
|
||||||
|
let right_parenthesis_position = self
|
||||||
|
.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::FunctionCallMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
|
||||||
|
let expression_start = left_hand_side.span().token_from;
|
||||||
|
self.arena.alloc_node_between(
|
||||||
|
Expression::Call {
|
||||||
|
callee: left_hand_side,
|
||||||
|
arguments: argument_list,
|
||||||
|
},
|
||||||
|
expression_start,
|
||||||
|
right_parenthesis_position,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses one call argument slot after an already consumed `(`.
|
||||||
|
///
|
||||||
|
/// In `UnrealScript`, every comma introduces a follow-up argument slot, so a
|
||||||
|
/// trailing comma immediately before `)` denotes an omitted final argument.
|
||||||
|
///
|
||||||
|
/// Returns [`ParsedCallArgumentSlot::NoMoreArguments`] when the argument
|
||||||
|
/// list ends, and `Argument(None)` for an omitted argument slot.
|
||||||
|
pub(crate) fn parse_call_argument_slot(
|
||||||
|
&mut self,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> ParsedCallArgumentSlot<'src, 'arena> {
|
||||||
|
match self.peek_token() {
|
||||||
|
Some(Token::RightParenthesis) => return ParsedCallArgumentSlot::NoMoreArguments,
|
||||||
|
Some(Token::Comma) => {
|
||||||
|
self.advance();
|
||||||
|
if self.at_call_argument_boundary() {
|
||||||
|
return ParsedCallArgumentSlot::Argument(None);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
let argument = self.parse_expression();
|
||||||
|
if !self.at_call_argument_boundary() {
|
||||||
|
self.make_error_here(ParseErrorKind::FunctionArgumentMissingComma)
|
||||||
|
.widen_error_span_from(left_parenthesis_position)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
ParsedCallArgumentSlot::Argument(Some(argument))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a call argument list after an already-consumed `(`.
|
||||||
|
///
|
||||||
|
/// Returns all parsed argument slots, preserving omitted arguments
|
||||||
|
/// as `None`.
|
||||||
|
fn parse_call_argument_list(
|
||||||
|
&mut self,
|
||||||
|
left_parenthesis_position: TokenPosition,
|
||||||
|
) -> ArenaVec<'arena, Option<ExpressionRef<'src, 'arena>>> {
|
||||||
|
let mut argument_list = ArenaVec::new_in(self.arena);
|
||||||
|
|
||||||
|
while let ParsedCallArgumentSlot::Argument(argument) =
|
||||||
|
self.parse_call_argument_slot(left_parenthesis_position)
|
||||||
|
{
|
||||||
|
argument_list.push(argument);
|
||||||
|
}
|
||||||
|
|
||||||
|
argument_list
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether the current lookahead token ends the current call
|
||||||
|
/// argument slot.
|
||||||
|
///
|
||||||
|
/// This is true for `,`, which starts the next slot, and for `)`, which
|
||||||
|
/// ends the argument list.
|
||||||
|
fn at_call_argument_boundary(&mut self) -> bool {
|
||||||
|
matches!(
|
||||||
|
self.peek_token(),
|
||||||
|
Some(Token::Comma | Token::RightParenthesis)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
203
rottlib/src/parser/grammar/expression/switch.rs
Normal file
203
rottlib/src/parser/grammar/expression/switch.rs
Normal file
@ -0,0 +1,203 @@
|
|||||||
|
//! Switch parsing for Fermented `UnrealScript`.
|
||||||
|
//!
|
||||||
|
//! Provides routines for parsing `switch (...) { ... }` expressions.
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
/// Parses a `switch` expression after the `switch` keyword has been
|
||||||
|
/// consumed.
|
||||||
|
///
|
||||||
|
/// Returns an [`crate::ast::Expression::Switch`] whose span covers the
|
||||||
|
/// entire construct, from `switch_start_position` to the closing `}`.
|
||||||
|
///
|
||||||
|
/// Only one `default` arm is recorded. Duplicate defaults and `case` arms
|
||||||
|
/// after a `default` are reported as errors.
|
||||||
|
///
|
||||||
|
/// On premature end-of-file, reports an error and returns a best-effort
|
||||||
|
/// switch node.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_switch_tail(
|
||||||
|
&mut self,
|
||||||
|
switch_start_position: TokenPosition,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
let selector = self.parse_expression();
|
||||||
|
let mut cases = self.arena.vec();
|
||||||
|
let mut default_arm = None;
|
||||||
|
let mut span = AstSpan::new(switch_start_position);
|
||||||
|
if self
|
||||||
|
.expect(Token::LeftBrace, ParseErrorKind::SwitchMissingBody)
|
||||||
|
.report_error(self)
|
||||||
|
{
|
||||||
|
return self.alloc_switch_node(selector, cases, default_arm, span);
|
||||||
|
}
|
||||||
|
while let Some((token, token_position)) = self.peek_token_and_position() {
|
||||||
|
match token {
|
||||||
|
Token::RightBrace => {
|
||||||
|
self.advance(); // '}'
|
||||||
|
span.extend_to(token_position);
|
||||||
|
return self.alloc_switch_node(selector, cases, default_arm, span);
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Case) => {
|
||||||
|
if default_arm.is_some() {
|
||||||
|
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
|
||||||
|
}
|
||||||
|
let case_node = self.parse_switch_case_group(token_position);
|
||||||
|
cases.push(case_node);
|
||||||
|
}
|
||||||
|
Token::Keyword(Keyword::Default) => {
|
||||||
|
if default_arm.is_some() {
|
||||||
|
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
|
||||||
|
}
|
||||||
|
// Duplicate `default` is still parsed so that diagnostics
|
||||||
|
// in its body can be reported.
|
||||||
|
self.parse_switch_default_arm(
|
||||||
|
token_position,
|
||||||
|
default_arm.get_or_insert_with(|| self.arena.vec()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
// Items before the first arm declaration are not allowed, but
|
||||||
|
// are parsed for basic diagnostics and simplicity.
|
||||||
|
_ => self.parse_switch_preamble_items(token_position),
|
||||||
|
}
|
||||||
|
self.ensure_forward_progress(token_position);
|
||||||
|
}
|
||||||
|
self.report_error_here(ParseErrorKind::SwitchMissingClosingBrace);
|
||||||
|
// This can only be `None` in the pathological case of
|
||||||
|
// an empty token stream
|
||||||
|
span.extend_to(
|
||||||
|
self.last_consumed_position()
|
||||||
|
.unwrap_or(switch_start_position),
|
||||||
|
);
|
||||||
|
self.alloc_switch_node(selector, cases, default_arm, span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a stacked `case` group and its body:
|
||||||
|
/// `case <expr>: (case <expr>:)* <arm-body>`.
|
||||||
|
///
|
||||||
|
/// Returns the allocated [`crate::ast::CaseRef`] node.
|
||||||
|
///
|
||||||
|
/// The returned node span covers the entire group, from
|
||||||
|
/// `first_case_position` to the end of the arm body, or to the end of the
|
||||||
|
/// last label if the body is empty.
|
||||||
|
#[must_use]
|
||||||
|
fn parse_switch_case_group(
|
||||||
|
&mut self,
|
||||||
|
first_case_position: TokenPosition,
|
||||||
|
) -> crate::ast::SwitchCaseRef<'src, 'arena> {
|
||||||
|
let mut labels = self.arena.vec();
|
||||||
|
while let Some((Keyword::Case, case_position)) = self.peek_keyword_and_position() {
|
||||||
|
self.advance(); // 'case'
|
||||||
|
labels.push(self.parse_expression());
|
||||||
|
|
||||||
|
// `:` is required after each case label; missing `:` is recovered
|
||||||
|
// at statement sync level.
|
||||||
|
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
||||||
|
.widen_error_span_from(case_position)
|
||||||
|
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
let mut body = self.arena.vec();
|
||||||
|
self.parse_switch_arm_body(&mut body);
|
||||||
|
let case_span = compute_case_span(first_case_position, &labels, &body);
|
||||||
|
self.arena
|
||||||
|
.alloc_node(crate::ast::SwitchCase { labels, body }, case_span)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `default:` arm and appends its statements to `statements`.
|
||||||
|
fn parse_switch_default_arm(
|
||||||
|
&mut self,
|
||||||
|
default_position: TokenPosition,
|
||||||
|
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||||
|
) {
|
||||||
|
self.advance(); // 'default'
|
||||||
|
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
||||||
|
.widen_error_span_from(default_position)
|
||||||
|
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
||||||
|
.report_error(self);
|
||||||
|
self.parse_switch_arm_body(statements);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses statements of a single switch arm body.
|
||||||
|
fn parse_switch_arm_body(
|
||||||
|
&mut self,
|
||||||
|
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
||||||
|
) {
|
||||||
|
while let Some((token, token_position)) = self.peek_token_and_position() {
|
||||||
|
match token {
|
||||||
|
Token::Keyword(Keyword::Case | Keyword::Default) | Token::RightBrace => break,
|
||||||
|
_ => {
|
||||||
|
self.parse_next_block_item_into(statements);
|
||||||
|
self.ensure_forward_progress(token_position);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses items that appear before any `case` or `default` arm declaration.
|
||||||
|
///
|
||||||
|
/// Such items are not allowed, but they are parsed to produce diagnostics
|
||||||
|
/// and maintain forward progress.
|
||||||
|
///
|
||||||
|
/// Parsed statements are discarded; only error reporting is preserved.
|
||||||
|
///
|
||||||
|
/// Parsing stops at a boundary token or end-of-file.
|
||||||
|
/// Boundary tokens: `case`, `default`, `}`.
|
||||||
|
fn parse_switch_preamble_items(&mut self, preamble_start_position: TokenPosition)
|
||||||
|
where
|
||||||
|
'src: 'arena,
|
||||||
|
{
|
||||||
|
// Discard parsed statements into a sink vector.
|
||||||
|
// This is a bit "hacky", but I don't want to adapt code to skip
|
||||||
|
// production of AST nodes just to report errors in
|
||||||
|
// one problematic case.
|
||||||
|
let mut sink = self.arena.vec();
|
||||||
|
self.parse_switch_arm_body(&mut sink);
|
||||||
|
self.make_error_here(ParseErrorKind::SwitchTopLevelItemNotCase)
|
||||||
|
.widen_error_span_from(preamble_start_position)
|
||||||
|
.report_error(self);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper to allocate a `Switch` expression with the given span.
|
||||||
|
#[must_use]
|
||||||
|
fn alloc_switch_node(
|
||||||
|
&self,
|
||||||
|
selector: ExpressionRef<'src, 'arena>,
|
||||||
|
cases: ArenaVec<'arena, crate::ast::SwitchCaseRef<'src, 'arena>>,
|
||||||
|
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
||||||
|
span: AstSpan,
|
||||||
|
) -> ExpressionRef<'src, 'arena> {
|
||||||
|
self.arena.alloc_node(
|
||||||
|
crate::ast::Expression::Switch {
|
||||||
|
selector,
|
||||||
|
cases,
|
||||||
|
default_arm,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Computes an [`AstSpan`] covering a `case` group.
|
||||||
|
///
|
||||||
|
/// The span begins at `labels_start_position` and extends to:
|
||||||
|
/// - the end of the last statement in `body`, if present; otherwise
|
||||||
|
/// - the end of the last label in `labels`, if present.
|
||||||
|
///
|
||||||
|
/// If both are empty, the span covers only `labels_start_position`.
|
||||||
|
#[must_use]
|
||||||
|
fn compute_case_span(
|
||||||
|
labels_start_position: TokenPosition,
|
||||||
|
labels: &[ExpressionRef],
|
||||||
|
body: &[StatementRef],
|
||||||
|
) -> AstSpan {
|
||||||
|
let mut span = AstSpan::new(labels_start_position);
|
||||||
|
if let Some(last_statement) = body.last() {
|
||||||
|
span.extend_to(last_statement.span().token_to);
|
||||||
|
} else if let Some(last_label) = labels.last() {
|
||||||
|
span.extend_to(last_label.span().token_to);
|
||||||
|
}
|
||||||
|
span
|
||||||
|
}
|
||||||
@ -1,99 +0,0 @@
|
|||||||
use crate::ast::{AstSpan, Expression};
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
|
||||||
use crate::parser::ParseErrorKind;
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parse the continuation of a `return` after its keyword was consumed.
|
|
||||||
///
|
|
||||||
/// Doesn't consume the terminating `;`.
|
|
||||||
/// If the next token is not `;`, parses an expression as the optional
|
|
||||||
/// value. Produces an [`Expression::Return`] whose span runs from
|
|
||||||
/// the `return` keyword to the end of the value if present, otherwise to
|
|
||||||
/// the `return` keyword.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_return_cont(
|
|
||||||
&mut self,
|
|
||||||
return_start_location: TokenLocation,
|
|
||||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
|
||||||
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
|
|
||||||
let value = self.parse_expression();
|
|
||||||
|
|
||||||
let span = AstSpan {
|
|
||||||
from: return_start_location,
|
|
||||||
to: value.span().to,
|
|
||||||
};
|
|
||||||
(Some(value), span)
|
|
||||||
} else {
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
AstSpan {
|
|
||||||
from: return_start_location,
|
|
||||||
to: return_start_location,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
};
|
|
||||||
self.arena.alloc(Expression::Return(value), span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse the continuation of a `break` after its keyword was consumed.
|
|
||||||
///
|
|
||||||
/// Doesn't consume the terminating `;`.
|
|
||||||
/// If the next token is not `;`, parses an optional value expression.
|
|
||||||
/// Produces an [`Expression::Break`] spanning from `break` to the end
|
|
||||||
/// of the value if present, otherwise to the `break` keyword.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_break_cont(
|
|
||||||
&mut self,
|
|
||||||
break_start_location: TokenLocation,
|
|
||||||
) -> crate::ast::ExpressionRef<'src, 'arena> {
|
|
||||||
let (value, span) = if self.peek_token() != Some(Token::Semicolon) {
|
|
||||||
let value = self.parse_expression();
|
|
||||||
|
|
||||||
let span = AstSpan {
|
|
||||||
from: break_start_location,
|
|
||||||
to: value.span().to,
|
|
||||||
};
|
|
||||||
(Some(value), span)
|
|
||||||
} else {
|
|
||||||
(
|
|
||||||
None,
|
|
||||||
AstSpan {
|
|
||||||
from: break_start_location,
|
|
||||||
to: break_start_location,
|
|
||||||
},
|
|
||||||
)
|
|
||||||
};
|
|
||||||
self.arena.alloc(Expression::Break(value), span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a `goto` expression after `goto`, assuming that the `goto` token
|
|
||||||
/// was consumed.
|
|
||||||
///
|
|
||||||
/// Requires the next token to be an identifier label.
|
|
||||||
/// On missing token, returns [`ParseErrorKind::UnexpectedEndOfFile`].
|
|
||||||
/// On a non-identifier next token,
|
|
||||||
/// returns [`ParseErrorKind::GotoMissingLabel`].
|
|
||||||
/// On success, produces an [`Expression::Goto`] spanning from `goto`
|
|
||||||
/// to the label token.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_goto_cont(
|
|
||||||
&mut self,
|
|
||||||
goto_start_location: TokenLocation,
|
|
||||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
|
||||||
let Some((token, text, token_location)) = self.peek_token_lexeme_and_location() else {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
|
||||||
};
|
|
||||||
if token == Token::Identifier {
|
|
||||||
let span = AstSpan {
|
|
||||||
from: goto_start_location,
|
|
||||||
to: token_location,
|
|
||||||
};
|
|
||||||
self.advance();
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc(Expression::Goto(self.arena.string(text)), span))
|
|
||||||
} else {
|
|
||||||
Err(self.make_error_here(ParseErrorKind::GotoMissingLabel))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
297
rottlib/src/parser/grammar/function/definition.rs
Normal file
297
rottlib/src/parser/grammar/function/definition.rs
Normal file
@ -0,0 +1,297 @@
|
|||||||
|
//! Parsing of callable definitions for Fermented `UnrealScript`
|
||||||
|
//! (functions, events, delegates, operators).
|
||||||
|
|
||||||
|
use crate::arena::ArenaVec;
|
||||||
|
|
||||||
|
use crate::ast::{
|
||||||
|
AstSpan, CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier,
|
||||||
|
CallableModifierKind, CallableName, IdentifierToken, InfixOperator, InfixOperatorName,
|
||||||
|
ParameterRef, PostfixOperator, PostfixOperatorName, PrefixOperator, PrefixOperatorName,
|
||||||
|
TypeSpecifierRef,
|
||||||
|
};
|
||||||
|
use crate::lexer::{Keyword, Token, TokenPosition};
|
||||||
|
use crate::parser::{
|
||||||
|
ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel,
|
||||||
|
recovery::RecoveryFallback,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Temporary parsed representation of a callable header without its body.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub(super) struct ParsedCallableHeader<'src, 'arena> {
|
||||||
|
pub start_position: TokenPosition,
|
||||||
|
pub modifiers: crate::arena::ArenaVec<'arena, CallableModifier>,
|
||||||
|
pub kind: CallableKind,
|
||||||
|
pub return_type_specifier: Option<TypeSpecifierRef<'src, 'arena>>,
|
||||||
|
pub name: CallableName,
|
||||||
|
pub parameters: crate::arena::ArenaVec<'arena, ParameterRef<'src, 'arena>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for ParsedCallableHeader<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
|
let fallback_position = error.covered_span.token_from;
|
||||||
|
ParsedCallableHeader {
|
||||||
|
start_position: fallback_position,
|
||||||
|
modifiers: parser.arena.vec(),
|
||||||
|
kind: CallableKind::Function,
|
||||||
|
return_type_specifier: None,
|
||||||
|
name: CallableName::Identifier(IdentifierToken(fallback_position)),
|
||||||
|
parameters: parser.arena.vec(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
|
/// Parses a callable definition.
|
||||||
|
///
|
||||||
|
/// Assumes [`Parser::is_callable_header_ahead`] has already confirmed that
|
||||||
|
/// a callable declaration begins at the current position. This affects
|
||||||
|
/// the diagnostics produced for malformed input.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_callable_definition(&mut self) -> CallableDefinitionRef<'src, 'arena> {
|
||||||
|
let header = self.parse_callable_header().unwrap_or_fallback(self);
|
||||||
|
|
||||||
|
let body = if self.eat(Token::LeftBrace) {
|
||||||
|
Some(self.parse_braced_block_statements_tail(self.last_consumed_position_or_start()))
|
||||||
|
} else {
|
||||||
|
self.expect(
|
||||||
|
Token::Semicolon,
|
||||||
|
ParseErrorKind::CallableMissingBodyOrSemicolon,
|
||||||
|
)
|
||||||
|
.report_error(self);
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = AstSpan::range(
|
||||||
|
header.start_position,
|
||||||
|
self.last_consumed_position_or_start(),
|
||||||
|
);
|
||||||
|
|
||||||
|
self.arena.alloc_node(
|
||||||
|
CallableDefinition {
|
||||||
|
name: header.name,
|
||||||
|
kind: header.kind,
|
||||||
|
return_type_specifier: header.return_type_specifier,
|
||||||
|
modifiers: header.modifiers,
|
||||||
|
parameters: header.parameters,
|
||||||
|
body,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a callable header without the body.
|
||||||
|
fn parse_callable_header(
|
||||||
|
&mut self,
|
||||||
|
) -> ParseResult<'src, 'arena, ParsedCallableHeader<'src, 'arena>> {
|
||||||
|
let start_position = self.require_position(ParseErrorKind::CallableExpectedHeader)?;
|
||||||
|
let mut modifiers = self.arena.vec();
|
||||||
|
self.collect_callable_modifiers(&mut modifiers);
|
||||||
|
let kind = self.parse_callable_kind()?;
|
||||||
|
self.collect_callable_modifiers(&mut modifiers);
|
||||||
|
|
||||||
|
// `(` cannot appear inside a return type in this grammar,
|
||||||
|
// so seeing it here means the callable has no return type specifier.
|
||||||
|
let return_type_specifier = match self.peek_token_at(1) {
|
||||||
|
Some(Token::LeftParenthesis) => None,
|
||||||
|
_ => Some(self.parse_type_specifier()?),
|
||||||
|
};
|
||||||
|
let name = self.parse_callable_name(kind)?;
|
||||||
|
|
||||||
|
self.expect(
|
||||||
|
Token::LeftParenthesis,
|
||||||
|
ParseErrorKind::CallableParamsMissingOpeningParenthesis,
|
||||||
|
)
|
||||||
|
.report_error(self);
|
||||||
|
let parameters = self.parse_parameter_list();
|
||||||
|
self.expect(
|
||||||
|
Token::RightParenthesis,
|
||||||
|
ParseErrorKind::CallableParamsMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
Ok(ParsedCallableHeader {
|
||||||
|
start_position,
|
||||||
|
modifiers,
|
||||||
|
kind,
|
||||||
|
return_type_specifier,
|
||||||
|
name,
|
||||||
|
parameters,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_callable_kind(&mut self) -> ParseResult<'src, 'arena, CallableKind> {
|
||||||
|
if let Some(keyword) = self.peek_keyword() {
|
||||||
|
// Handle this separately because only infix operators can carry
|
||||||
|
// an optional precedence and cannot, therefore, be handled by
|
||||||
|
// a simple converter.
|
||||||
|
if keyword == Keyword::Operator {
|
||||||
|
self.advance();
|
||||||
|
let precedence = self.parse_optional_parenthesized_integer(
|
||||||
|
ParseErrorKind::CallableOperatorInvalidPrecedence,
|
||||||
|
);
|
||||||
|
return Ok(CallableKind::InfixOperator(precedence));
|
||||||
|
}
|
||||||
|
if let Ok(kind) = CallableKind::try_from(keyword) {
|
||||||
|
self.advance();
|
||||||
|
return Ok(kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(self.make_error_here(ParseErrorKind::CallableExpectedKind))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_callable_name(
|
||||||
|
&mut self,
|
||||||
|
kind: CallableKind,
|
||||||
|
) -> ParseResult<'src, 'arena, CallableName> {
|
||||||
|
match kind {
|
||||||
|
CallableKind::Function | CallableKind::Event | CallableKind::Delegate => self
|
||||||
|
.parse_identifier(ParseErrorKind::CallableNameNotIdentifier)
|
||||||
|
.map(CallableName::Identifier),
|
||||||
|
CallableKind::PrefixOperator => {
|
||||||
|
let (token, operator_position) = self.require_token_and_position(
|
||||||
|
ParseErrorKind::CallablePrefixOperatorInvalidSymbol,
|
||||||
|
)?;
|
||||||
|
let operator = PrefixOperator::try_from(token).map_err(|()| {
|
||||||
|
self.make_error_here(ParseErrorKind::CallablePrefixOperatorInvalidSymbol)
|
||||||
|
})?;
|
||||||
|
self.advance();
|
||||||
|
Ok(CallableName::PrefixOperator(PrefixOperatorName {
|
||||||
|
kind: operator,
|
||||||
|
position: operator_position,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
CallableKind::InfixOperator(_) => {
|
||||||
|
let (token, operator_position) = self.require_token_and_position(
|
||||||
|
ParseErrorKind::CallableInfixOperatorInvalidSymbol,
|
||||||
|
)?;
|
||||||
|
let operator = InfixOperator::try_from(token).map_err(|()| {
|
||||||
|
self.make_error_here(ParseErrorKind::CallableInfixOperatorInvalidSymbol)
|
||||||
|
})?;
|
||||||
|
self.advance();
|
||||||
|
Ok(CallableName::InfixOperator(InfixOperatorName {
|
||||||
|
kind: operator,
|
||||||
|
position: operator_position,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
CallableKind::PostfixOperator => {
|
||||||
|
let (token, operator_position) = self.require_token_and_position(
|
||||||
|
ParseErrorKind::CallablePostfixOperatorInvalidSymbol,
|
||||||
|
)?;
|
||||||
|
let operator = PostfixOperator::try_from(token).map_err(|()| {
|
||||||
|
self.make_error_here(ParseErrorKind::CallablePostfixOperatorInvalidSymbol)
|
||||||
|
})?;
|
||||||
|
self.advance();
|
||||||
|
Ok(CallableName::PostfixOperator(PostfixOperatorName {
|
||||||
|
kind: operator,
|
||||||
|
position: operator_position,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an uninterrupted sequence of function modifiers into
|
||||||
|
/// given vector.
|
||||||
|
pub(crate) fn collect_callable_modifiers(
|
||||||
|
&mut self,
|
||||||
|
modifiers: &mut ArenaVec<'arena, CallableModifier>,
|
||||||
|
) {
|
||||||
|
while let Some(next_mod) = self.parse_function_modifier() {
|
||||||
|
modifiers.push(next_mod);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_function_modifier(&mut self) -> Option<CallableModifier> {
|
||||||
|
let (keyword, start) = self.peek_keyword_and_position()?;
|
||||||
|
|
||||||
|
let kind = match keyword {
|
||||||
|
Keyword::Native => {
|
||||||
|
self.advance();
|
||||||
|
let native_id = self.parse_optional_parenthesized_integer(
|
||||||
|
ParseErrorKind::NativeModifierIdNotIntegerLiteral,
|
||||||
|
);
|
||||||
|
CallableModifierKind::Native(native_id)
|
||||||
|
}
|
||||||
|
Keyword::Config => {
|
||||||
|
self.advance();
|
||||||
|
let ident = self
|
||||||
|
.parse_required_parenthesized_identifier(
|
||||||
|
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
|
||||||
|
ParseErrorKind::ParenthesisedIdentifierMissingClosingParenthesis,
|
||||||
|
)
|
||||||
|
.unwrap_or(IdentifierToken(start));
|
||||||
|
CallableModifierKind::Config(ident)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let simple = CallableModifierKind::try_from(keyword).ok()?;
|
||||||
|
// Only advance after confirming it is the modifier
|
||||||
|
self.advance();
|
||||||
|
simple
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = AstSpan::range(start, self.last_consumed_position_or_start());
|
||||||
|
Some(CallableModifier { kind, span })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_optional_parenthesized_integer(&mut self, close_err: ParseErrorKind) -> Option<u128> {
|
||||||
|
if !self.eat(Token::LeftParenthesis) {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = match self.peek_token_and_lexeme() {
|
||||||
|
Some((Token::IntegerLiteral, lex)) => {
|
||||||
|
self.advance();
|
||||||
|
self.decode_integer_literal(lex).ok_or_report(self)
|
||||||
|
}
|
||||||
|
Some(_) => {
|
||||||
|
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
|
||||||
|
self.advance();
|
||||||
|
None
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.report_error_here(ParseErrorKind::OperatorPrecedenceNotIntegerLiteral);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.expect(Token::RightParenthesis, close_err)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
value
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_required_parenthesized_identifier(
|
||||||
|
&mut self,
|
||||||
|
close_err: ParseErrorKind,
|
||||||
|
ident_err: ParseErrorKind,
|
||||||
|
) -> Option<IdentifierToken> {
|
||||||
|
if !self.eat(Token::LeftParenthesis) {
|
||||||
|
self.report_error_here(ident_err);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ident = match self.peek_token_lexeme_and_position() {
|
||||||
|
Some((tok, _, pos)) if tok.is_valid_identifier_name() => {
|
||||||
|
self.advance();
|
||||||
|
Some(IdentifierToken(pos))
|
||||||
|
}
|
||||||
|
Some(_) => {
|
||||||
|
self.report_error_here(ident_err);
|
||||||
|
self.advance();
|
||||||
|
None
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.report_error_here(ident_err);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
self.expect(Token::RightParenthesis, close_err)
|
||||||
|
.sync_error_at(self, SyncLevel::CloseParenthesis)
|
||||||
|
.report_error(self);
|
||||||
|
|
||||||
|
ident
|
||||||
|
}
|
||||||
|
}
|
||||||
50
rottlib/src/parser/grammar/function/lookahead.rs
Normal file
50
rottlib/src/parser/grammar/function/lookahead.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
//! Lookahead for callable headers in Fermented `UnrealScript`.
|
||||||
|
|
||||||
|
use crate::lexer::{Keyword, Token};
|
||||||
|
use crate::parser::Parser;
|
||||||
|
|
||||||
|
impl Parser<'_, '_> {
|
||||||
|
/// Returns whether the upcoming tokens have the syntactic shape of
|
||||||
|
/// a callable header.
|
||||||
|
///
|
||||||
|
/// Returns `true` when the following tokens consist of zero or more
|
||||||
|
/// callable modifiers followed by a keyword that defines a callable kind.
|
||||||
|
///
|
||||||
|
/// Does not check whether any parenthesized arguments are valid.
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn is_callable_header_ahead(&mut self) -> bool {
|
||||||
|
let mut lookahead_offset = 0;
|
||||||
|
while let Some(keyword) = self.peek_keyword_at(lookahead_offset) {
|
||||||
|
if keyword.is_callable_kind_keyword() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if let Some(token_width) = self.callable_modifier_width_at(keyword, lookahead_offset) {
|
||||||
|
lookahead_offset += token_width;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn callable_modifier_width_at(
|
||||||
|
&mut self,
|
||||||
|
keyword: Keyword,
|
||||||
|
lookahead_token_offset: usize,
|
||||||
|
) -> Option<usize> {
|
||||||
|
if !keyword.is_callable_modifier() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
if matches!(keyword, Keyword::Native | Keyword::Config)
|
||||||
|
&& self.peek_token_at(lookahead_token_offset + 1) == Some(Token::LeftParenthesis)
|
||||||
|
&& self.peek_token_at(lookahead_token_offset + 3) == Some(Token::RightParenthesis)
|
||||||
|
{
|
||||||
|
// `native(...)` and `config(...)` consume a parenthesized specifier
|
||||||
|
// in modifier position, so lookahead must skip the whole modifier.
|
||||||
|
Some(4)
|
||||||
|
} else {
|
||||||
|
Some(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
3
rottlib/src/parser/grammar/function/mod.rs
Normal file
3
rottlib/src/parser/grammar/function/mod.rs
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
mod definition;
|
||||||
|
mod lookahead;
|
||||||
|
mod params;
|
||||||
107
rottlib/src/parser/grammar/function/params.rs
Normal file
107
rottlib/src/parser/grammar/function/params.rs
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
use crate::arena::ArenaVec;
|
||||||
|
use crate::ast::{AstSpan, Parameter, ParameterModifier, ParameterModifierKind, ParameterRef};
|
||||||
|
use crate::lexer::{Keyword, Token};
|
||||||
|
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
// allowed to switch to result returning
|
||||||
|
pub(crate) fn parse_parameter_list(&mut self) -> ArenaVec<'arena, ParameterRef<'src, 'arena>> {
|
||||||
|
let mut params = self.arena.vec();
|
||||||
|
if matches!(self.peek_token(), Some(Token::RightParenthesis)) {
|
||||||
|
return params;
|
||||||
|
}
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let start_pos = self.last_consumed_position_or_start();
|
||||||
|
|
||||||
|
let mut modifiers = self.arena.vec();
|
||||||
|
|
||||||
|
while let Some((next_keyword, next_position)) = self.peek_keyword_and_position() {
|
||||||
|
match next_keyword {
|
||||||
|
Keyword::Optional => {
|
||||||
|
modifiers.push(ParameterModifier {
|
||||||
|
kind: ParameterModifierKind::Optional,
|
||||||
|
position: next_position,
|
||||||
|
});
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
Keyword::Out => {
|
||||||
|
modifiers.push(ParameterModifier {
|
||||||
|
kind: ParameterModifierKind::Out,
|
||||||
|
position: next_position,
|
||||||
|
});
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
Keyword::Coerce => {
|
||||||
|
modifiers.push(ParameterModifier {
|
||||||
|
kind: ParameterModifierKind::Coerce,
|
||||||
|
position: next_position,
|
||||||
|
});
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
Keyword::Skip => {
|
||||||
|
modifiers.push(ParameterModifier {
|
||||||
|
kind: ParameterModifierKind::Skip,
|
||||||
|
position: next_position,
|
||||||
|
});
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let type_spec = match self.parse_type_specifier() {
|
||||||
|
Ok(t) => t,
|
||||||
|
Err(e) => {
|
||||||
|
self.report_error(e);
|
||||||
|
self.recover_until(SyncLevel::ListSeparator);
|
||||||
|
if self.eat(Token::Comma) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let name = self
|
||||||
|
.parse_identifier(ParseErrorKind::ParamMissingIdentifier)
|
||||||
|
.unwrap_or_fallback(self);
|
||||||
|
|
||||||
|
let array_len = match self.parse_array_len_expr() {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(e) => {
|
||||||
|
self.report_error(e);
|
||||||
|
self.recover_until(SyncLevel::CloseBracket);
|
||||||
|
let _ = self.eat(Token::RightBracket);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let default_value = if self.eat(Token::Assign) {
|
||||||
|
Some(self.parse_expression())
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let span = AstSpan::range(start_pos, self.last_consumed_position_or_start());
|
||||||
|
params.push(self.arena.alloc_node(
|
||||||
|
Parameter {
|
||||||
|
modifiers,
|
||||||
|
type_specifier: type_spec,
|
||||||
|
name,
|
||||||
|
array_size: array_len,
|
||||||
|
default_value,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
));
|
||||||
|
|
||||||
|
if !self.eat(Token::Comma) || matches!(self.peek_token(), Some(Token::RightParenthesis))
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.ensure_forward_progress(start_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
params
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,119 +0,0 @@
|
|||||||
use crate::ast::DeclarationLiteral;
|
|
||||||
use crate::lexer::Token;
|
|
||||||
use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt};
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses a simple literal value that can be used inside FerUS's top-level
|
|
||||||
/// class members' definitions.
|
|
||||||
///
|
|
||||||
/// On success consumes exactly one token. If the next token is not
|
|
||||||
/// a supported literal, returns [`None`] and leaves the stream untouched.
|
|
||||||
pub(crate) fn parse_declaration_literal(
|
|
||||||
&mut self,
|
|
||||||
) -> ParseResult<crate::ast::DeclarationLiteralRef> {
|
|
||||||
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
|
|
||||||
else {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
|
||||||
};
|
|
||||||
let declaration_literal = match token {
|
|
||||||
Token::IntegerLiteral => {
|
|
||||||
let value = self
|
|
||||||
.decode_integer_literal(token_text)
|
|
||||||
.unwrap_or_fallback(self);
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::Integer(value)
|
|
||||||
}
|
|
||||||
Token::FloatLiteral => {
|
|
||||||
let value = self
|
|
||||||
.decode_float_literal(token_text)
|
|
||||||
.unwrap_or_fallback(self);
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::Float(value)
|
|
||||||
}
|
|
||||||
Token::StringLiteral => {
|
|
||||||
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::String(value)
|
|
||||||
}
|
|
||||||
Token::True => {
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::Bool(true)
|
|
||||||
}
|
|
||||||
Token::False => {
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::Bool(false)
|
|
||||||
}
|
|
||||||
Token::None => {
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::None
|
|
||||||
}
|
|
||||||
Token::Identifier => {
|
|
||||||
self.advance();
|
|
||||||
DeclarationLiteral::Identifier(token_text)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::DeclarationLiteralUnexpectedToken));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
Ok((declaration_literal, token_location))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses an integer literal as [`i128`].
|
|
||||||
///
|
|
||||||
/// Expects a normalized decimal string with optional leading sign.
|
|
||||||
/// Does not accept base prefixes or digit separators.
|
|
||||||
///
|
|
||||||
/// [`i128`] type was chosen to cover FerUS's integer range so constant
|
|
||||||
/// folding remains precise.
|
|
||||||
pub(crate) fn decode_integer_literal(&mut self, text: &str) -> ParseResult<i128> {
|
|
||||||
text.parse::<i128>()
|
|
||||||
.map_err(|_| self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a float literal as [`f64`].
|
|
||||||
///
|
|
||||||
/// Expects a normalized decimal float (optional sign, decimal point,
|
|
||||||
/// optional exponent). Special values like `inf`/`NaN` are rejected.
|
|
||||||
pub(crate) fn decode_float_literal(&mut self, text: &str) -> ParseResult<f64> {
|
|
||||||
if let Ok(parsed_value) = text.parse::<f64>() {
|
|
||||||
Ok(parsed_value)
|
|
||||||
} else {
|
|
||||||
Err(self.make_error_here(ParseErrorKind::InvalidNumericLiteral))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Unescapes a tokenized string literal into an arena string.
|
|
||||||
///
|
|
||||||
/// Supported escapes: `\n`, `\t`, `\"`, `\\`.
|
|
||||||
/// Unknown escape sequences are preserved as-is (UnrealScript behavior).
|
|
||||||
///
|
|
||||||
/// Note: this function assumes `raw` is the token text without surrounding
|
|
||||||
/// quotes.
|
|
||||||
pub(crate) fn unescape_string_literal(
|
|
||||||
arena: &'arena crate::arena::Arena,
|
|
||||||
raw: &str,
|
|
||||||
) -> crate::arena::ArenaString<'arena> {
|
|
||||||
let mut buffer = String::with_capacity(raw.len());
|
|
||||||
let mut characters = raw.chars();
|
|
||||||
while let Some(next_character) = characters.next() {
|
|
||||||
if next_character == '\\' {
|
|
||||||
// The lexer never produces a trailing backslash in a string
|
|
||||||
// token, so there's always a following character to inspect.
|
|
||||||
if let Some(escaped_character) = characters.next() {
|
|
||||||
match escaped_character {
|
|
||||||
'n' => buffer.push('\n'),
|
|
||||||
't' => buffer.push('\t'),
|
|
||||||
'"' => buffer.push('"'),
|
|
||||||
'\\' => buffer.push('\\'),
|
|
||||||
// Simply leaving the escaped character as-is matches
|
|
||||||
// UnrealScript behavior.
|
|
||||||
other => buffer.push(other),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
buffer.push(next_character);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
arena.string(&buffer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,8 +1,15 @@
|
|||||||
mod block;
|
//! ## Naming conventions
|
||||||
mod control;
|
//!
|
||||||
mod flow;
|
//! Some naming conventions that might not be obvious:
|
||||||
mod literals;
|
//!
|
||||||
mod pratt;
|
//! - `*_tail` means the opening token or keyword has already been consumed.
|
||||||
mod precedence;
|
//! Tail parsers build the rest of the construct and usually return a total,
|
||||||
mod statements;
|
//! recovered result.
|
||||||
mod switch;
|
//! - `*_into` means the method extends an already parsed value or appends into
|
||||||
|
//! an existing output container.
|
||||||
|
|
||||||
|
mod class;
|
||||||
|
mod declarations;
|
||||||
|
mod expression;
|
||||||
|
mod function;
|
||||||
|
mod statement;
|
||||||
|
|||||||
@ -1,342 +0,0 @@
|
|||||||
//! Expression parsing for the language front-end.
|
|
||||||
//!
|
|
||||||
//! This module implements a Pratt-style parser for the language's expression
|
|
||||||
//! grammar, supporting:
|
|
||||||
//!
|
|
||||||
//! * Primary expressions (literals, identifiers, parenthesized expressions)
|
|
||||||
//! * Prefix operators
|
|
||||||
//! * Postfix operators
|
|
||||||
//! * Infix operators with precedence and associativity
|
|
||||||
//!
|
|
||||||
//! Parsing is driven by [`PrecedenceRank`], which controls how tightly
|
|
||||||
//! operators bind. Infix parsing uses the pair of binding powers returned by
|
|
||||||
//! [`super::precedence::infix_precedence_ranks`] to encode associativity.
|
|
||||||
//! The parser infrastructure supports both left- and right-associative
|
|
||||||
//! operators, but Fermented UnrealScript currently defines only
|
|
||||||
//! right-associative ones.
|
|
||||||
//!
|
|
||||||
//! ## See also
|
|
||||||
//!
|
|
||||||
//! - [`crate::parser::Parser::parse_expression`] - main entry point
|
|
||||||
//! - [`PrecedenceRank`] - operator binding strengths
|
|
||||||
//! - [`super::precedence`] - operator precedence definitions
|
|
||||||
|
|
||||||
use crate::ast::{Expression, ExpressionRef, NeedsSemi};
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
|
||||||
use crate::parser::{ParseErrorKind, ParseExpressionResult, ResultRecoveryExt, SyncLevel};
|
|
||||||
|
|
||||||
pub(crate) use super::precedence::PrecedenceRank;
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses an expression.
|
|
||||||
pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> {
|
|
||||||
self.parse_expression_with_precedence(PrecedenceRank::LOOSEST)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses an expression with operators of at least `min_precedence_rank`
|
|
||||||
/// (as tight or tighter).
|
|
||||||
fn parse_expression_with_precedence(
|
|
||||||
&mut self,
|
|
||||||
min_precedence_rank: PrecedenceRank,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
// Intentional order: (1) prefix/primary, (2) postfix (tighter than
|
|
||||||
// any infix), (3) infix. We don't run a second postfix pass;
|
|
||||||
// `(a+b)!` works because the parenthesized sub-expression had its own
|
|
||||||
// postfix pass before returning.
|
|
||||||
let mut left_hand_side = self
|
|
||||||
.parse_prefix_or_primary()
|
|
||||||
.sync_error_until(self, SyncLevel::Expression)
|
|
||||||
.unwrap_or_fallback(self);
|
|
||||||
// Postfix operators are tighter than any infix ones
|
|
||||||
left_hand_side = self.parse_postfix_into(left_hand_side);
|
|
||||||
left_hand_side = self.parse_infix_into(left_hand_side, min_precedence_rank);
|
|
||||||
left_hand_side
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a prefix or primary expression (Pratt parser's "nud" or
|
|
||||||
/// null denotation).
|
|
||||||
///
|
|
||||||
/// Errors with [`ParseErrorKind::UnexpectedEndOfFile`] if the stream ends
|
|
||||||
/// before a valid prefix/primary.
|
|
||||||
fn parse_prefix_or_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
|
|
||||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
|
||||||
};
|
|
||||||
|
|
||||||
if let Ok(operator) = crate::ast::PrefixOperator::try_from(token) {
|
|
||||||
self.advance();
|
|
||||||
let right_hand_side = self.parse_expression_with_precedence(PrecedenceRank::TIGHTEST);
|
|
||||||
Ok(Expression::new_prefix(
|
|
||||||
self.arena,
|
|
||||||
token_location,
|
|
||||||
operator,
|
|
||||||
right_hand_side,
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
self.parse_primary()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a primary expression: literals, identifiers, or a parenthesized
|
|
||||||
/// sub-expression.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// [`ParseErrorKind::ExpressionUnexpectedToken`] if the next token
|
|
||||||
/// cannot start a primary; [`ParseErrorKind::UnexpectedEndOfFile`]
|
|
||||||
/// at end of input.
|
|
||||||
fn parse_primary(&mut self) -> ParseExpressionResult<'src, 'arena> {
|
|
||||||
// For diagnostics, we only advance *after* fully parsing the current
|
|
||||||
// literal/token.
|
|
||||||
let Some((token, token_text, token_location)) = self.peek_token_lexeme_and_location()
|
|
||||||
else {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
|
||||||
};
|
|
||||||
match token {
|
|
||||||
Token::IntegerLiteral => {
|
|
||||||
let value = self.decode_integer_literal(token_text)?;
|
|
||||||
self.advance();
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc_at(Expression::Integer(value), token_location))
|
|
||||||
}
|
|
||||||
Token::FloatLiteral => {
|
|
||||||
let value = self.decode_float_literal(token_text)?;
|
|
||||||
self.advance();
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc_at(Expression::Float(value), token_location))
|
|
||||||
}
|
|
||||||
Token::StringLiteral => {
|
|
||||||
let value = crate::parser::Parser::unescape_string_literal(self.arena, token_text);
|
|
||||||
self.advance();
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc_at(Expression::String(value), token_location))
|
|
||||||
}
|
|
||||||
Token::True => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.arena.alloc_at(Expression::Bool(true), token_location))
|
|
||||||
}
|
|
||||||
Token::False => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.arena.alloc_at(Expression::Bool(false), token_location))
|
|
||||||
}
|
|
||||||
Token::None => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.arena.alloc_at(Expression::None, token_location))
|
|
||||||
}
|
|
||||||
Token::Identifier => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self
|
|
||||||
.arena
|
|
||||||
.alloc_at(Expression::Identifier(token_text), token_location))
|
|
||||||
}
|
|
||||||
Token::LeftParenthesis => {
|
|
||||||
self.advance();
|
|
||||||
self.parse_parenthesized_expression_cont(token_location)
|
|
||||||
}
|
|
||||||
Token::If => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_if_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::While => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_while_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::Do => {
|
|
||||||
self.advance();
|
|
||||||
self.parse_do_until_cont(token_location)
|
|
||||||
}
|
|
||||||
Token::ForEach => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_foreach_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::For => {
|
|
||||||
self.advance();
|
|
||||||
self.parse_for_cont(token_location)
|
|
||||||
}
|
|
||||||
Token::Brace(crate::lexer::BraceKind::Normal) => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_block_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::Return => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_return_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::Break => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.parse_break_cont(token_location))
|
|
||||||
}
|
|
||||||
Token::Continue => {
|
|
||||||
self.advance();
|
|
||||||
Ok(self.arena.alloc_at(Expression::Continue, token_location))
|
|
||||||
}
|
|
||||||
Token::Goto => {
|
|
||||||
self.advance();
|
|
||||||
self.parse_goto_cont(token_location)
|
|
||||||
}
|
|
||||||
Token::Switch => {
|
|
||||||
self.advance();
|
|
||||||
self.parse_switch_cont(token_location)
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
// Unexpected token in expression.
|
|
||||||
Err(self.make_error_here(ParseErrorKind::ExpressionUnexpectedToken))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses an expression in parentheses.
|
|
||||||
///
|
|
||||||
/// Assumes the `(` was already consumed; its location is
|
|
||||||
/// `left_parenthesis_location`.
|
|
||||||
/// On success, allocates a [`Expression::Parentheses`] node with a span
|
|
||||||
/// covering from `(` to `)`.
|
|
||||||
///
|
|
||||||
/// Errors with [`ParseErrorKind::ExpressionMissingClosingParenthesis`] if
|
|
||||||
/// a closing `)` is missing; the diagnostic is associated with
|
|
||||||
/// the opening `(` via `left_parenthesis_location`.
|
|
||||||
fn parse_parenthesized_expression_cont(
|
|
||||||
&mut self,
|
|
||||||
left_parenthesis_location: TokenLocation,
|
|
||||||
) -> ParseExpressionResult<'src, 'arena> {
|
|
||||||
let inner_expression = self.parse_expression();
|
|
||||||
let right_parenthesis_location = self
|
|
||||||
.expect(
|
|
||||||
Token::RightParenthesis,
|
|
||||||
ParseErrorKind::ExpressionMissingClosingParenthesis,
|
|
||||||
)
|
|
||||||
.widen_error_span_from(left_parenthesis_location)
|
|
||||||
.sync_error_at(self, SyncLevel::CloseParenthesis)?;
|
|
||||||
Ok(self.arena.alloc_between(
|
|
||||||
Expression::Parentheses(inner_expression),
|
|
||||||
left_parenthesis_location,
|
|
||||||
right_parenthesis_location,
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses all postfix operators it can, creating a tree with
|
|
||||||
/// `left_hand_side` as a child.
|
|
||||||
fn parse_postfix_into(
|
|
||||||
&mut self,
|
|
||||||
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
// Single peek that yields `(postfix_op, location)` so the postfix loop
|
|
||||||
// can advance once per operator without extra matching/unwraps.
|
|
||||||
while let Some((operator, operator_location)) = self.peek_postfix_with_location() {
|
|
||||||
self.advance();
|
|
||||||
left_hand_side =
|
|
||||||
Expression::new_postfix(self.arena, left_hand_side, operator, operator_location);
|
|
||||||
}
|
|
||||||
left_hand_side
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses infix operators binding at least as tight as
|
|
||||||
/// `min_precedence_rank`.
|
|
||||||
///
|
|
||||||
/// Associativity is encoded by
|
|
||||||
/// [`super::precedence::infix_precedence_ranks`]: the right-hand
|
|
||||||
/// side is parsed with `right_precedence_rank`, so `a - b - c` vs
|
|
||||||
/// `a ^ b ^ c` associate correctly based on the pair
|
|
||||||
/// `(left_rank, right_rank)`.
|
|
||||||
///
|
|
||||||
/// Stops when the next operator is looser than `min_precedence_rank`.
|
|
||||||
fn parse_infix_into(
|
|
||||||
&mut self,
|
|
||||||
mut left_hand_side: ExpressionRef<'src, 'arena>,
|
|
||||||
min_precedence_rank: PrecedenceRank,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
while let Some((operator, right_precedence_rank)) =
|
|
||||||
self.peek_infix_at_least(min_precedence_rank)
|
|
||||||
{
|
|
||||||
self.advance();
|
|
||||||
let right_hand_side = self.parse_expression_with_precedence(right_precedence_rank);
|
|
||||||
left_hand_side =
|
|
||||||
Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side);
|
|
||||||
}
|
|
||||||
left_hand_side
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the next postfix operator and its location if present.
|
|
||||||
///
|
|
||||||
/// Helper to avoid peeking and mapping twice; used to drive the postfix
|
|
||||||
/// loop without unwraps.
|
|
||||||
fn peek_postfix_with_location(
|
|
||||||
&mut self,
|
|
||||||
) -> Option<(crate::ast::PostfixOperator, TokenLocation)> {
|
|
||||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
let Ok(operator) = crate::ast::PostfixOperator::try_from(token) else {
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
Some((operator, token_location))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If the next token is an infix operator with left binding power at least
|
|
||||||
/// `min_precedence_rank`, returns its operator and precedence rank.
|
|
||||||
///
|
|
||||||
/// Otherwise return [`None`].
|
|
||||||
fn peek_infix_at_least(
|
|
||||||
&mut self,
|
|
||||||
min_precedence_rank: PrecedenceRank,
|
|
||||||
) -> Option<(crate::ast::InfixOperator, PrecedenceRank)> {
|
|
||||||
let (left_precedence_rank, operator, right_precedence_rank) = self
|
|
||||||
.peek_token()
|
|
||||||
.and_then(super::precedence::infix_precedence_ranks)?;
|
|
||||||
if left_precedence_rank.is_looser_than(min_precedence_rank) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
Some((operator, right_precedence_rank))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses one item inside a `{ ... }` block.
|
|
||||||
///
|
|
||||||
/// The item can be a statement (e.g. a variable declaration) or an
|
|
||||||
/// expression. If the item is an expression without a following
|
|
||||||
/// semicolon, it is returned as the block's current tail expression
|
|
||||||
/// - the value considered to be the block's result. In well-formed
|
|
||||||
/// code such a tail expression appears only at the very end of the block.
|
|
||||||
///
|
|
||||||
/// This method never consumes the closing `}` and is only meant to be
|
|
||||||
/// called while parsing inside a block.
|
|
||||||
pub(crate) fn parse_block_item(
|
|
||||||
&mut self,
|
|
||||||
statements: &mut crate::arena::ArenaVec<'arena, crate::ast::StatementRef<'src, 'arena>>,
|
|
||||||
) -> Option<crate::ast::ExpressionRef<'src, 'arena>> {
|
|
||||||
if let Some(mut next_statement) = self.parse_statement() {
|
|
||||||
if next_statement.needs_semicolon() {
|
|
||||||
// For statements we immediately know if lack of
|
|
||||||
// semicolon is an issue
|
|
||||||
if let Some(Token::Semicolon) = self.peek_token() {
|
|
||||||
next_statement.span_mut().to = self.peek_location();
|
|
||||||
self.advance(); // ';'
|
|
||||||
} else {
|
|
||||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterStatement);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
statements.push(next_statement);
|
|
||||||
} else {
|
|
||||||
let mut next_expression = self.parse_expression();
|
|
||||||
if let Expression::Error = *next_expression {
|
|
||||||
self.recover_until(SyncLevel::Statement);
|
|
||||||
next_expression.span_mut().to = self.peek_location();
|
|
||||||
}
|
|
||||||
if let Some((Token::Semicolon, semicolon_location)) = self.peek_token_and_location() {
|
|
||||||
self.advance(); // ;
|
|
||||||
let span = crate::ast::AstSpan {
|
|
||||||
from: next_expression.span().from,
|
|
||||||
to: semicolon_location,
|
|
||||||
};
|
|
||||||
let expression_statement_node = self
|
|
||||||
.arena
|
|
||||||
.alloc(crate::ast::Statement::Expression(next_expression), span);
|
|
||||||
statements.push(expression_statement_node);
|
|
||||||
} else {
|
|
||||||
return Some(next_expression);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,185 +0,0 @@
|
|||||||
//! Precedence tables for Fermented UnrealScript operators.
|
|
||||||
//!
|
|
||||||
//! These values don't follow the usual *binding power* convention for
|
|
||||||
//! a Pratt parser, where tighter binding corresponds to a larger number.
|
|
||||||
//! Here, the smaller the number, the tighter the binding power.
|
|
||||||
//! For this reason, we use the term *precedence rank* instead.
|
|
||||||
//!
|
|
||||||
//! ## Operators sorted by precedence (lowest number = tighter binding)
|
|
||||||
//!
|
|
||||||
//! ### Infix operators
|
|
||||||
//!
|
|
||||||
//! All infix operators in UnrealScript are
|
|
||||||
//! [left-associative](https://wiki.beyondunreal.com/Operators).
|
|
||||||
//!
|
|
||||||
//! 12: `**`
|
|
||||||
//! 16: `*`, `/`, `Cross`, `Dot`
|
|
||||||
//! 18: `%`
|
|
||||||
//! 20: `+`, `-`
|
|
||||||
//! 22: `<<`, `>>`, `>>>`
|
|
||||||
//! 24: `<`, `>`, `<=`, `>=`, `==`, `~=`, `ClockwiseFrom`
|
|
||||||
//! 26: `!=`
|
|
||||||
//! 28: `&`, `^`, `|`
|
|
||||||
//! 30: `&&`, `^^`
|
|
||||||
//! 32: `||`
|
|
||||||
//! 34: `*=`, `/=`, `+=`, `-=`
|
|
||||||
//! 40: `$`, `*`, `@`
|
|
||||||
//! 44: `$=`, `*=`, `@=`
|
|
||||||
//! 45: `-=`
|
|
||||||
//!
|
|
||||||
//! Some operator, such as `*`, appear twice with different precedence
|
|
||||||
//! ranks because they were defined with different values for different types
|
|
||||||
//! in separate script source files (as in the Killing Floor sources).
|
|
||||||
//! However, UnrealScript uses only the first definition it encounters in
|
|
||||||
//! `Object.uc`, which corresponds to the lower value.
|
|
||||||
//!
|
|
||||||
//! ### Prefix operators
|
|
||||||
//!
|
|
||||||
//! `!`, `~`, `-`, `++`, `--`.
|
|
||||||
//!
|
|
||||||
//! ### Postfix operators
|
|
||||||
//!
|
|
||||||
//! `++`, `--`.
|
|
||||||
|
|
||||||
use crate::ast::{InfixOperator, PostfixOperator, PrefixOperator};
|
|
||||||
use crate::lexer::Token;
|
|
||||||
|
|
||||||
/// Compact precedence rank used by the Pratt Parser.
|
|
||||||
///
|
|
||||||
/// A smaller number means tighter binding, and a larger number means looser
|
|
||||||
/// binding. This inverted scale matches how UnrealScript tables were recorded.
|
|
||||||
#[must_use]
|
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) struct PrecedenceRank(u8);
|
|
||||||
|
|
||||||
impl PrecedenceRank {
|
|
||||||
/// The loosest possible precedence rank.
|
|
||||||
///
|
|
||||||
/// In this inverted scale (smaller number = tighter binding),
|
|
||||||
/// this is represented by the maximum [`u8`] value.
|
|
||||||
pub const LOOSEST: Self = PrecedenceRank(u8::MAX);
|
|
||||||
|
|
||||||
/// The tightest possible precedence rank.
|
|
||||||
///
|
|
||||||
/// In this inverted scale (smaller number = tighter binding),
|
|
||||||
/// this is represented by zero.
|
|
||||||
pub const TIGHTEST: PrecedenceRank = PrecedenceRank(0);
|
|
||||||
|
|
||||||
/// Returns `true` if `other` has a looser binding than `self`.
|
|
||||||
///
|
|
||||||
/// # Examples
|
|
||||||
///
|
|
||||||
/// ```
|
|
||||||
/// # use crate::parser::expressions::PrecedenceRank;
|
|
||||||
/// let a = PrecedenceRank(40);
|
|
||||||
/// let b = PrecedenceRank(34);
|
|
||||||
/// assert!(a.is_looser_than(b)); // 40 is looser than 34
|
|
||||||
///
|
|
||||||
/// let c = PrecedenceRank(20);
|
|
||||||
/// let d = PrecedenceRank(24);
|
|
||||||
/// assert!(!c.is_looser_than(d)); // 20 is tighter than 24
|
|
||||||
/// ```
|
|
||||||
pub fn is_looser_than(self, other: Self) -> bool {
|
|
||||||
self.0 > other.0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<Token> for PrefixOperator {
|
|
||||||
type Error = ();
|
|
||||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
|
||||||
use PrefixOperator::*;
|
|
||||||
Ok(match token {
|
|
||||||
Token::Not => Not,
|
|
||||||
Token::Minus => Minus,
|
|
||||||
Token::BitwiseNot => BitwiseNot,
|
|
||||||
Token::Increment => Increment,
|
|
||||||
Token::Decrement => Decrement,
|
|
||||||
_ => return Err(()),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TryFrom<Token> for PostfixOperator {
|
|
||||||
type Error = ();
|
|
||||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
|
||||||
use PostfixOperator::*;
|
|
||||||
Ok(match token {
|
|
||||||
Token::Increment => Increment,
|
|
||||||
Token::Decrement => Decrement,
|
|
||||||
_ => return Err(()),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Maps a token to its infix operator along with its left and right binding
|
|
||||||
/// ranks: `(left_precedence_rank, operator, right_precedence_rank)`.
|
|
||||||
///
|
|
||||||
/// Returns [`None`] if and only if `token` is not an infix operator.
|
|
||||||
pub(crate) fn infix_precedence_ranks(
|
|
||||||
token: Token,
|
|
||||||
) -> Option<(PrecedenceRank, InfixOperator, PrecedenceRank)> {
|
|
||||||
use crate::ast::InfixOperator::*;
|
|
||||||
let (left_precedence_rank, operator) = match token {
|
|
||||||
// 12: `**`
|
|
||||||
Token::Exponentiation => (12, Exponentiation),
|
|
||||||
// 16: `*`, `/`, `Cross`, `Dot` (left-assoc)
|
|
||||||
Token::Multiply => (16, Multiply),
|
|
||||||
Token::Divide => (16, Divide),
|
|
||||||
Token::Cross => (16, Cross),
|
|
||||||
Token::Dot => (16, Dot),
|
|
||||||
// 18: `%`
|
|
||||||
Token::Modulo => (18, Modulo),
|
|
||||||
// 20: `+`, `-`
|
|
||||||
Token::Plus => (20, Plus),
|
|
||||||
Token::Minus => (20, Minus),
|
|
||||||
// 22: `<<`, `>>`, `>>>`
|
|
||||||
Token::LeftShift => (22, LeftShift),
|
|
||||||
Token::RightShift => (22, RightShift),
|
|
||||||
Token::LogicalRightShift => (22, LogicalRightShift),
|
|
||||||
// 24: comparison operators
|
|
||||||
Token::Less => (24, Less),
|
|
||||||
Token::LessEqual => (24, LessEqual),
|
|
||||||
Token::Greater => (24, Greater),
|
|
||||||
Token::GreaterEqual => (24, GreaterEqual),
|
|
||||||
Token::Equal => (24, Equal),
|
|
||||||
Token::ApproximatelyEqual => (24, ApproximatelyEqual),
|
|
||||||
Token::ClockwiseFrom => (24, ClockwiseFrom),
|
|
||||||
// 26: `!=`
|
|
||||||
Token::NotEqual => (26, NotEqual),
|
|
||||||
// 28: bit-wise `&`, `^`, `|`
|
|
||||||
Token::BitwiseAnd => (28, BitwiseAnd),
|
|
||||||
Token::BitwiseXor => (28, BitwiseXor),
|
|
||||||
Token::BitwiseOr => (28, BitwiseOr),
|
|
||||||
// 30: logical `&&`, `^^`
|
|
||||||
Token::And => (30, And),
|
|
||||||
Token::Xor => (30, Xor),
|
|
||||||
// 32: logical `||`
|
|
||||||
Token::Or => (32, Or),
|
|
||||||
// 34: `*=`, `/=`, `+=`, `-=`
|
|
||||||
Token::MultiplyAssign => (34, MultiplyAssign),
|
|
||||||
Token::DivideAssign => (34, DivideAssign),
|
|
||||||
Token::PlusAssign => (34, PlusAssign),
|
|
||||||
Token::MinusAssign => (34, MinusAssign),
|
|
||||||
// Simple '=' treated with same precedence
|
|
||||||
Token::Assign => (34, Assign),
|
|
||||||
Token::ModuloAssign => (34, ModuloAssign),
|
|
||||||
// 40: `$`, `@`
|
|
||||||
Token::Concat => (40, Concat),
|
|
||||||
Token::ConcatSpace => (40, ConcatSpace),
|
|
||||||
// 44: `$=`, `@=`
|
|
||||||
Token::ConcatAssign => (44, ConcatAssign),
|
|
||||||
Token::ConcatSpaceAssign => (44, ConcatSpaceAssign),
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
// All operators are left-associative, so `right_precedence_rank` is set to
|
|
||||||
// `left_binding_rank - 1` (with our "smaller is tighter" scale, this
|
|
||||||
// enforces left associativity in Pratt parsing).
|
|
||||||
//
|
|
||||||
// Since all precedences are even, subtracting one won't actually cross
|
|
||||||
// any boundary between operator groups.
|
|
||||||
Some((
|
|
||||||
PrecedenceRank(left_precedence_rank),
|
|
||||||
operator,
|
|
||||||
PrecedenceRank(left_precedence_rank - 1),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
85
rottlib/src/parser/grammar/statement.rs
Normal file
85
rottlib/src/parser/grammar/statement.rs
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
//! Statement parsing for the language front-end.
|
||||||
|
//!
|
||||||
|
//! Implements a simple recursive-descent parser for
|
||||||
|
//! *Fermented `UnrealScript` statements*.
|
||||||
|
|
||||||
|
use crate::ast::{AstSpan, Statement, StatementRef};
|
||||||
|
use crate::lexer::{Keyword, Token};
|
||||||
|
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
||||||
|
|
||||||
|
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
||||||
|
/// Parses a single statement.
|
||||||
|
///
|
||||||
|
/// Does not consume a trailing `;` except for [`Statement::Empty`].
|
||||||
|
/// The caller handles semicolons. Returns [`Some`] if a statement is
|
||||||
|
/// recognized; otherwise [`None`].
|
||||||
|
#[must_use]
|
||||||
|
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
|
||||||
|
let Some((token, lexeme, position)) = self.peek_token_lexeme_and_position() else {
|
||||||
|
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
|
||||||
|
match token {
|
||||||
|
// Empty statement
|
||||||
|
Token::Semicolon => {
|
||||||
|
self.advance(); // `;`
|
||||||
|
Some(
|
||||||
|
self.arena
|
||||||
|
.alloc_node(Statement::Empty, AstSpan::new(position)),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnrealScript `local` declaration
|
||||||
|
Token::Keyword(Keyword::Local) => {
|
||||||
|
self.advance(); // `local`
|
||||||
|
let start = position;
|
||||||
|
|
||||||
|
let type_spec = self.parse_type_specifier().unwrap_or_fallback(self);
|
||||||
|
let declarators = self.parse_variable_declarators();
|
||||||
|
// TODO: parse
|
||||||
|
|
||||||
|
let span = AstSpan::range(start, self.last_consumed_position_or_start());
|
||||||
|
Some(self.arena.alloc_node(
|
||||||
|
Statement::LocalVariableDeclaration {
|
||||||
|
type_spec,
|
||||||
|
declarators,
|
||||||
|
},
|
||||||
|
span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Label: Ident ':' (also tolerate Begin:/End:)
|
||||||
|
Token::Identifier | Token::Keyword(Keyword::Begin | Keyword::End)
|
||||||
|
if matches!(self.peek_token_at(1), Some(Token::Colon)) =>
|
||||||
|
{
|
||||||
|
self.advance(); // ident/begin/end
|
||||||
|
self.advance(); // :
|
||||||
|
Some(self.arena.alloc_node(
|
||||||
|
Statement::Label(self.arena.string(lexeme)),
|
||||||
|
AstSpan::range(position, self.last_consumed_position_or_start()),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Nested function/event/operator inside blocks
|
||||||
|
t if t == Token::Keyword(Keyword::Function)
|
||||||
|
|| t == Token::Keyword(Keyword::Event)
|
||||||
|
|| t.is_valid_function_modifier() =>
|
||||||
|
{
|
||||||
|
let f = self.parse_callable_definition();
|
||||||
|
|
||||||
|
let span = *f.span();
|
||||||
|
Some(self.arena.alloc_node(Statement::Function(f), span))
|
||||||
|
}
|
||||||
|
|
||||||
|
// C-like variable declaration starting with a TypeSpec
|
||||||
|
/*token if self.looks_like_variable_declaration_start(token) => Some(
|
||||||
|
self.parse_variable_declaration_start()
|
||||||
|
.sync_error_until(self, SyncLevel::Statement)
|
||||||
|
.unwrap_or_fallback(self),
|
||||||
|
),*/
|
||||||
|
// Not a statement
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -1,185 +0,0 @@
|
|||||||
//! Statement parsing for the language front-end.
|
|
||||||
//!
|
|
||||||
//! Implements a simple recursive-descent parser for
|
|
||||||
//! *Fermented UnrealScript statements*.
|
|
||||||
|
|
||||||
use crate::ast::{AstSpan, Statement, StatementRef};
|
|
||||||
use crate::lexer::Token;
|
|
||||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel};
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses a single statement.
|
|
||||||
///
|
|
||||||
/// Does not consume a trailing `;` except for [`Statement::Empty`].
|
|
||||||
/// The caller handles semicolons. Returns [`Some`] if a statement is
|
|
||||||
/// recognized; otherwise [`None`].
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_statement(&mut self) -> Option<StatementRef<'src, 'arena>> {
|
|
||||||
let Some((token, lexeme, location)) = self.peek_token_lexeme_and_location() else {
|
|
||||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
|
||||||
return None;
|
|
||||||
};
|
|
||||||
match token {
|
|
||||||
// Empty statement
|
|
||||||
Token::Semicolon => {
|
|
||||||
self.advance(); // `;`
|
|
||||||
Some(self.arena.alloc(Statement::Empty, AstSpan::new(location)))
|
|
||||||
}
|
|
||||||
// UnrealScript's standard `local` variable declaration
|
|
||||||
Token::Local => {
|
|
||||||
self.advance(); // `local`
|
|
||||||
Some(
|
|
||||||
self.parse_local_variable_declaration_cont()
|
|
||||||
.widen_error_span_from(location)
|
|
||||||
.sync_error_until(self, SyncLevel::Statement)
|
|
||||||
.unwrap_or_fallback(self),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
// Label definition
|
|
||||||
Token::Identifier if matches!(self.peek_token_at(1), Some(Token::Colon)) => {
|
|
||||||
self.advance(); // `Token::Identifier`
|
|
||||||
self.advance(); // `:`
|
|
||||||
Some(self.arena.alloc(
|
|
||||||
Statement::Label(self.arena.string(lexeme)),
|
|
||||||
AstSpan::range(location, self.last_visited_location()),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
// C-like variable declaration
|
|
||||||
token
|
|
||||||
if token.is_valid_type_name_token()
|
|
||||||
&& Some(Token::Identifier) == self.peek_token_at(1) =>
|
|
||||||
{
|
|
||||||
self.advance(); // `TYPE_NAME`
|
|
||||||
// Next token is guaranteed to exist by the arm condition
|
|
||||||
Some(self.parse_variable_declaration_cont(lexeme))
|
|
||||||
}
|
|
||||||
// Not a statement
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a local variable declaration after `local` has been consumed.
|
|
||||||
///
|
|
||||||
/// Requires the next token to be a type name. Initializers are not allowed.
|
|
||||||
/// Reports and recovers from errors; the identifier list may be empty if
|
|
||||||
/// recovery fails.
|
|
||||||
fn parse_local_variable_declaration_cont(
|
|
||||||
&mut self,
|
|
||||||
) -> crate::parser::ParseResult<'src, 'arena, StatementRef<'src, 'arena>> {
|
|
||||||
let Some((type_token, type_name)) = self.peek_token_and_lexeme() else {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::UnexpectedEndOfFile));
|
|
||||||
};
|
|
||||||
if !type_token.is_valid_type_name_token() {
|
|
||||||
return Err(self.make_error_here(ParseErrorKind::LocalInvalidTypeName));
|
|
||||||
}
|
|
||||||
let declaration_start_location = self.last_visited_location();
|
|
||||||
self.advance(); // `TYPE_NAME`
|
|
||||||
|
|
||||||
let type_name = self.arena.string(type_name);
|
|
||||||
let identifiers = self.parse_local_identifier_list();
|
|
||||||
if identifiers.is_empty() {
|
|
||||||
self.make_error_here(ParseErrorKind::LocalMissingIdentifier)
|
|
||||||
.widen_error_span_from(declaration_start_location)
|
|
||||||
.report_error(self);
|
|
||||||
}
|
|
||||||
Ok(self.arena.alloc(
|
|
||||||
Statement::LocalVariableDeclaration {
|
|
||||||
type_name,
|
|
||||||
identifiers,
|
|
||||||
},
|
|
||||||
AstSpan::range(declaration_start_location, self.last_visited_location()),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a comma-separated list of identifiers for a local declaration.
|
|
||||||
///
|
|
||||||
/// Best-effort recovery from errors. Returns an empty list if no valid
|
|
||||||
/// identifiers are found.
|
|
||||||
fn parse_local_identifier_list(
|
|
||||||
&mut self,
|
|
||||||
) -> crate::arena::ArenaVec<'arena, crate::arena::ArenaString<'arena>> {
|
|
||||||
let mut identifiers = self.arena.vec();
|
|
||||||
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
|
|
||||||
if token == Token::Identifier {
|
|
||||||
identifiers.push(self.arena.string(next_variable_name));
|
|
||||||
self.advance(); // `Token::Identifier`
|
|
||||||
} else {
|
|
||||||
self.report_error_here(ParseErrorKind::LocalBadVariableIdentifier);
|
|
||||||
// Try to recover to the next variable name
|
|
||||||
self.recover_until(SyncLevel::ListSeparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disallow initializers in `local`.
|
|
||||||
if let Some(Token::Assign) = self.peek_token() {
|
|
||||||
self.report_error_here(ParseErrorKind::LocalInitializerNotAllowed);
|
|
||||||
self.recover_until(SyncLevel::ListSeparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can the list continue?
|
|
||||||
// Loop cannot stall: each iteration consumes a token or breaks
|
|
||||||
if !self.eat(Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// End-of-file branch
|
|
||||||
identifiers
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a non-local variable declaration after the type name token
|
|
||||||
/// has been consumed.
|
|
||||||
///
|
|
||||||
/// The caller must guarantee that at least one declarator follows.
|
|
||||||
/// Optional initializers are allowed.
|
|
||||||
fn parse_variable_declaration_cont(
|
|
||||||
&mut self,
|
|
||||||
type_name: &'src str,
|
|
||||||
) -> StatementRef<'src, 'arena> {
|
|
||||||
let declaration_start_location = self.last_visited_location();
|
|
||||||
let type_name = self.arena.string(type_name);
|
|
||||||
let declarations = self.parse_variable_declaration_list();
|
|
||||||
// An identifier required by method's condition
|
|
||||||
debug_assert!(!declarations.is_empty());
|
|
||||||
self.arena.alloc(
|
|
||||||
Statement::VariableDeclaration {
|
|
||||||
type_name,
|
|
||||||
declarations,
|
|
||||||
},
|
|
||||||
AstSpan::range(declaration_start_location, self.last_visited_location()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a comma-separated list of declarators with optional `=`
|
|
||||||
/// initializers.
|
|
||||||
///
|
|
||||||
/// Best-effort recovery on errors.
|
|
||||||
/// The caller should invoke this when the next token starts a declarator.
|
|
||||||
fn parse_variable_declaration_list(
|
|
||||||
&mut self,
|
|
||||||
) -> crate::arena::ArenaVec<'arena, crate::ast::VariableDeclarator<'src, 'arena>> {
|
|
||||||
let mut variables = self.arena.vec();
|
|
||||||
while let Some((token, next_variable_name)) = self.peek_token_and_lexeme() {
|
|
||||||
if token == Token::Identifier {
|
|
||||||
self.advance(); // `Token::Identifier`
|
|
||||||
let name = self.arena.string(next_variable_name);
|
|
||||||
let initializer = if self.eat(Token::Assign) {
|
|
||||||
Some(self.parse_expression())
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
variables.push(crate::ast::VariableDeclarator { name, initializer });
|
|
||||||
} else {
|
|
||||||
self.report_error_here(ParseErrorKind::DeclBadVariableIdentifier);
|
|
||||||
// Try to recover to the next variable name
|
|
||||||
self.recover_until(SyncLevel::ListSeparator);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can the list continue?
|
|
||||||
// Loop cannot stall: each iteration consumes a token or breaks
|
|
||||||
if !self.eat(Token::Comma) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// End-of-file branch
|
|
||||||
variables
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,227 +0,0 @@
|
|||||||
use crate::arena::ArenaVec;
|
|
||||||
use crate::ast::{AstSpan, ExpressionRef, StatementRef};
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
|
||||||
use crate::parser::{ParseErrorKind, ResultRecoveryExt};
|
|
||||||
|
|
||||||
impl<'src, 'arena> crate::parser::Parser<'src, 'arena> {
|
|
||||||
/// Parses a `switch` expression after the `switch` keyword was consumed.
|
|
||||||
///
|
|
||||||
/// Arm bodies accept statements and expressions. A last, expression without
|
|
||||||
/// `;` in the last arm becomes the switch's tail value if none was
|
|
||||||
/// captured yet.
|
|
||||||
/// Only one `default` case arm is allowed.
|
|
||||||
/// Returns a best-effort switch node on premature EOF.
|
|
||||||
#[must_use]
|
|
||||||
pub(crate) fn parse_switch_cont(
|
|
||||||
&mut self,
|
|
||||||
switch_start_location: TokenLocation,
|
|
||||||
) -> crate::parser::ParseExpressionResult<'src, 'arena> {
|
|
||||||
let selector = self.parse_expression();
|
|
||||||
self.expect(
|
|
||||||
Token::Brace(crate::lexer::BraceKind::Normal),
|
|
||||||
ParseErrorKind::SwitchMissingBody,
|
|
||||||
)
|
|
||||||
.report_error(self);
|
|
||||||
let (mut cases, mut default_arm, mut tail) = (self.arena.vec(), None, None);
|
|
||||||
let mut span = AstSpan::new(switch_start_location);
|
|
||||||
loop {
|
|
||||||
let Some((token, token_location)) = self.peek_token_and_location() else {
|
|
||||||
self.report_error_here(ParseErrorKind::UnexpectedEndOfFile);
|
|
||||||
span.extend_to(self.peek_location());
|
|
||||||
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
|
|
||||||
};
|
|
||||||
match token {
|
|
||||||
Token::RightBrace => {
|
|
||||||
self.advance(); // '}'
|
|
||||||
span.extend_to(token_location);
|
|
||||||
return Ok(self.alloc_switch_node(selector, cases, default_arm, tail, span));
|
|
||||||
}
|
|
||||||
Token::Case => {
|
|
||||||
if default_arm.is_some() {
|
|
||||||
self.report_error_here(ParseErrorKind::SwitchCasesAfterDefault);
|
|
||||||
}
|
|
||||||
let case_node = self.parse_switch_case_group(token_location, &mut tail);
|
|
||||||
cases.push(case_node);
|
|
||||||
}
|
|
||||||
Token::Default => {
|
|
||||||
if default_arm.is_some() {
|
|
||||||
self.report_error_here(ParseErrorKind::SwitchDuplicateDefault);
|
|
||||||
}
|
|
||||||
// We still parse a duplicate default to surface all errors.
|
|
||||||
// Bodies are effectively fused for error reporting;
|
|
||||||
// compilation stops anyway, so this trades AST correctness
|
|
||||||
// for diagnostics.
|
|
||||||
self.parse_switch_default_arm(
|
|
||||||
token_location,
|
|
||||||
default_arm.get_or_insert_with(|| self.arena.vec()),
|
|
||||||
&mut tail,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// This can only be triggered before parsing any `case` or
|
|
||||||
// `default` arms, since they stop either at the start of
|
|
||||||
// another arm declaration (e.g. at `case`/`default`) or
|
|
||||||
// at the `}` that ends switch body.
|
|
||||||
_ => self.parse_switch_preamble_items(&mut tail),
|
|
||||||
}
|
|
||||||
// Ensure forward progress under errors to avoid infinite loops.
|
|
||||||
if self.peek_location() <= token_location {
|
|
||||||
self.advance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses a stacked `case` group and its body:
|
|
||||||
/// `case <expr>: (case <expr>:)* <arm-body-until-boundary>`.
|
|
||||||
///
|
|
||||||
/// Returns the allocated [`crate::ast::CaseRef`] node.
|
|
||||||
#[must_use]
|
|
||||||
fn parse_switch_case_group(
|
|
||||||
&mut self,
|
|
||||||
first_case_location: TokenLocation,
|
|
||||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
) -> crate::ast::CaseRef<'src, 'arena> {
|
|
||||||
let mut labels = self.arena.vec();
|
|
||||||
while let Some((Token::Case, case_location)) = self.peek_token_and_location() {
|
|
||||||
// Guaranteed progress: we entered on `Token::Case`.
|
|
||||||
self.advance(); // 'case'
|
|
||||||
labels.push(self.parse_expression());
|
|
||||||
|
|
||||||
// Enforce `:` after each case with statement-level recovery.
|
|
||||||
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
|
||||||
.widen_error_span_from(case_location)
|
|
||||||
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
|
||||||
.report_error(self);
|
|
||||||
}
|
|
||||||
let mut body = self.arena.vec();
|
|
||||||
self.parse_switch_arm_body(&mut body, tail);
|
|
||||||
let case_span = compute_case_span(first_case_location, &labels, &body);
|
|
||||||
self.arena
|
|
||||||
.alloc(crate::ast::SwitchCase { labels, body }, case_span)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses the `default :` arm and its body.
|
|
||||||
///
|
|
||||||
/// Does not consume a boundary token after the body.
|
|
||||||
fn parse_switch_default_arm(
|
|
||||||
&mut self,
|
|
||||||
default_location: TokenLocation,
|
|
||||||
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
|
||||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
) {
|
|
||||||
self.advance(); // 'default'
|
|
||||||
self.expect(Token::Colon, ParseErrorKind::SwitchCaseMissingColon)
|
|
||||||
.widen_error_span_from(default_location)
|
|
||||||
.sync_error_until(self, crate::parser::SyncLevel::Statement)
|
|
||||||
.report_error(self);
|
|
||||||
self.parse_switch_arm_body(statements, tail);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses items of a single switch arm body until a boundary token or EOF.
|
|
||||||
///
|
|
||||||
/// Boundary tokens: `case`, `default`, `}`.
|
|
||||||
fn parse_switch_arm_body(
|
|
||||||
&mut self,
|
|
||||||
statements: &mut ArenaVec<'arena, StatementRef<'src, 'arena>>,
|
|
||||||
tail: &mut Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
) {
|
|
||||||
// No need to report end-of-file as it'll be done by
|
|
||||||
// `parse_switch_cont`.
|
|
||||||
while let Some((token, token_location)) = self.peek_token_and_location() {
|
|
||||||
match token {
|
|
||||||
// Complain about tail instruction if `switch` body
|
|
||||||
// doesn't end here
|
|
||||||
Token::Case | Token::Default => {
|
|
||||||
if let Some(tail_expression) = tail.take() {
|
|
||||||
self.report_error_here(ParseErrorKind::SwitchBareExpressionBeforeNextArm);
|
|
||||||
let span = *tail_expression.span();
|
|
||||||
let stmt = self
|
|
||||||
.arena
|
|
||||||
.alloc(crate::ast::Statement::Expression(tail_expression), span);
|
|
||||||
statements.push(stmt);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Token::RightBrace => break,
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
// We know that at this point:
|
|
||||||
// 1. There is still a token and it is not EOF;
|
|
||||||
// 2. It isn't end of the block.
|
|
||||||
// So having a tail statement there is a problem!
|
|
||||||
if let Some(tail_expression) = tail.take() {
|
|
||||||
self.report_error_here(ParseErrorKind::BlockMissingSemicolonAfterExpression);
|
|
||||||
let tail_span = *tail_expression.span();
|
|
||||||
let node = self.arena.alloc(
|
|
||||||
crate::ast::Statement::Expression(tail_expression),
|
|
||||||
tail_span,
|
|
||||||
);
|
|
||||||
statements.push(node);
|
|
||||||
}
|
|
||||||
*tail = self.parse_block_item(statements);
|
|
||||||
// Ensure forward progress under errors to avoid infinite loops.
|
|
||||||
if self.peek_location() <= token_location {
|
|
||||||
self.advance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parses items that were found in code *before* any arm (`case`/`default`)
|
|
||||||
/// declaration.
|
|
||||||
///
|
|
||||||
/// These aren't allowed, but we still want to perform a proper parsing step
|
|
||||||
/// to report whatever errors we can in case programmer simply forgot to put
|
|
||||||
/// an arm declaration.
|
|
||||||
///
|
|
||||||
/// Boundary tokens: `case`, `default`, `}`.
|
|
||||||
fn parse_switch_preamble_items(&mut self, tail: &mut Option<ExpressionRef<'src, 'arena>>) {
|
|
||||||
// Report the spurious token.
|
|
||||||
self.report_error_here(ParseErrorKind::SwitchTopLevelItemNotCase);
|
|
||||||
|
|
||||||
// Discard parsed statements into a sink vector.
|
|
||||||
// This is a bit "hacky", but I don't want to adapt code to skip
|
|
||||||
// production of AST nodes just to report errors in
|
|
||||||
// one problematic case.
|
|
||||||
let mut sink = self.arena.vec();
|
|
||||||
self.parse_switch_arm_body(&mut sink, tail);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Helper to allocate a `Switch` expression with the given span.
|
|
||||||
#[must_use]
|
|
||||||
fn alloc_switch_node(
|
|
||||||
&mut self,
|
|
||||||
selector: ExpressionRef<'src, 'arena>,
|
|
||||||
cases: ArenaVec<'arena, crate::ast::CaseRef<'src, 'arena>>,
|
|
||||||
default_arm: Option<ArenaVec<'arena, StatementRef<'src, 'arena>>>,
|
|
||||||
tail: Option<ExpressionRef<'src, 'arena>>,
|
|
||||||
span: AstSpan,
|
|
||||||
) -> ExpressionRef<'src, 'arena> {
|
|
||||||
self.arena.alloc(
|
|
||||||
crate::ast::Expression::Switch {
|
|
||||||
selector,
|
|
||||||
cases,
|
|
||||||
default_arm,
|
|
||||||
tail,
|
|
||||||
},
|
|
||||||
span,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Computes [`AstSpan`] covering all labels and the body.
|
|
||||||
#[must_use]
|
|
||||||
fn compute_case_span(
|
|
||||||
labels_start_location: TokenLocation,
|
|
||||||
labels: &[ExpressionRef],
|
|
||||||
body: &[StatementRef],
|
|
||||||
) -> AstSpan {
|
|
||||||
let mut span = AstSpan {
|
|
||||||
from: labels_start_location,
|
|
||||||
to: labels_start_location,
|
|
||||||
};
|
|
||||||
if let Some(last_statement) = body.last() {
|
|
||||||
span.extend_to(last_statement.span().to);
|
|
||||||
} else if let Some(last_label) = labels.last() {
|
|
||||||
span.extend_to(last_label.span().to);
|
|
||||||
}
|
|
||||||
span
|
|
||||||
}
|
|
||||||
@ -1,8 +1,8 @@
|
|||||||
//! Parser for Fermented UnrealScript (FerUS).
|
//! Parser for Fermented `UnrealScript` (`FerUS`).
|
||||||
//!
|
//!
|
||||||
//! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST
|
//! Consumes tokens from [`crate::lexer::TokenizedFile`] and allocates AST
|
||||||
//! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser;
|
//! nodes in [`crate::arena::Arena`]. Basic expressions use a Pratt parser;
|
||||||
//! the rest rely on recursive descent in [`crate::parser::grammar`].
|
//! the rest rely on recursive descent in [`crate::parser::grammar`].\
|
||||||
//! Non-fatal errors accumulate in `Parser::diagnostics` as
|
//! Non-fatal errors accumulate in `Parser::diagnostics` as
|
||||||
//! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by
|
//! [`crate::diagnostics::Diagnostic`]; recovery skips to sync points defined by
|
||||||
//! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while
|
//! [`crate::parser::recovery::SyncLevel`] and synthesizes error nodes while
|
||||||
@ -27,17 +27,14 @@
|
|||||||
|
|
||||||
use super::lexer;
|
use super::lexer;
|
||||||
|
|
||||||
pub use lexer::{TokenPiece, Tokens};
|
pub use lexer::{TokenData, Tokens};
|
||||||
|
|
||||||
mod cursor;
|
mod cursor;
|
||||||
mod errors;
|
mod errors;
|
||||||
mod grammar;
|
mod grammar;
|
||||||
pub mod pretty;
|
|
||||||
mod recovery;
|
mod recovery;
|
||||||
mod trivia;
|
mod trivia;
|
||||||
|
|
||||||
pub use pretty::{ExprTree, StmtTree};
|
|
||||||
|
|
||||||
pub use errors::ParseError;
|
pub use errors::ParseError;
|
||||||
pub(crate) use errors::{ParseErrorKind, ParseResult};
|
pub(crate) use errors::{ParseErrorKind, ParseResult};
|
||||||
pub(crate) use recovery::{ResultRecoveryExt, SyncLevel};
|
pub(crate) use recovery::{ResultRecoveryExt, SyncLevel};
|
||||||
@ -50,8 +47,8 @@ pub type ParseExpressionResult<'src, 'arena> =
|
|||||||
pub struct Parser<'src, 'arena> {
|
pub struct Parser<'src, 'arena> {
|
||||||
arena: &'arena crate::arena::Arena,
|
arena: &'arena crate::arena::Arena,
|
||||||
pub diagnostics: Vec<crate::diagnostics::Diagnostic>,
|
pub diagnostics: Vec<crate::diagnostics::Diagnostic>,
|
||||||
cursor: cursor::CursorComponent<'src>,
|
cursor: cursor::Cursor<'src, 'src>,
|
||||||
trivia: trivia::TriviaComponent<'src>,
|
trivia: trivia::TriviaIndexBuilder<'src>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> Parser<'src, 'arena> {
|
impl<'src, 'arena> Parser<'src, 'arena> {
|
||||||
@ -59,8 +56,8 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
|||||||
Self {
|
Self {
|
||||||
arena,
|
arena,
|
||||||
diagnostics: Vec::new(),
|
diagnostics: Vec::new(),
|
||||||
cursor: cursor::CursorComponent::new(file),
|
cursor: cursor::Cursor::new(file),
|
||||||
trivia: trivia::TriviaComponent::default(),
|
trivia: trivia::TriviaIndexBuilder::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,353 +0,0 @@
|
|||||||
use crate::ast::{Expression, Statement, SwitchCase, VariableDeclarator};
|
|
||||||
use core::fmt;
|
|
||||||
|
|
||||||
/// A borrow of either a statement or an expression node,
|
|
||||||
/// plus helpers to enrich the printed tree.
|
|
||||||
enum AnyNode<'src, 'a, 'b> {
|
|
||||||
Stmt(&'b Statement<'src, 'a>),
|
|
||||||
Expr(&'b Expression<'src, 'a>),
|
|
||||||
Case(&'b SwitchCase<'src, 'a>),
|
|
||||||
/// A leaf line with a preformatted label (e.g., variable names).
|
|
||||||
Text(String),
|
|
||||||
/// Wraps a child with a tag like "cond", "body", "else", "init".
|
|
||||||
Tagged(&'static str, Box<AnyNode<'src, 'a, 'b>>),
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Public wrappers to print trees starting from either kind of node.
|
|
||||||
pub struct StmtTree<'src, 'a, 'b>(pub &'b Statement<'src, 'a>);
|
|
||||||
pub struct ExprTree<'src, 'a, 'b>(pub &'b Expression<'src, 'a>);
|
|
||||||
|
|
||||||
impl<'src, 'a, 'b> fmt::Display for StmtTree<'src, 'a, 'b> {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
fmt_node(AnyNode::Stmt(self.0), f, "", true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<'src, 'a, 'b> fmt::Display for ExprTree<'src, 'a, 'b> {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
fmt_node(AnyNode::Expr(self.0), f, "", true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn fmt_node<'src, 'a, 'b>(
|
|
||||||
node: AnyNode<'src, 'a, 'b>,
|
|
||||||
f: &mut fmt::Formatter<'_>,
|
|
||||||
prefix: &str,
|
|
||||||
is_last: bool,
|
|
||||||
) -> fmt::Result {
|
|
||||||
write!(f, "{}{}─ ", prefix, if is_last { "└" } else { "├" })?;
|
|
||||||
writeln!(f, "{}", label(&node))?;
|
|
||||||
|
|
||||||
let new_prefix = format!("{}{}", prefix, if is_last { " " } else { "│ " });
|
|
||||||
let kids = children(node);
|
|
||||||
let len = kids.len();
|
|
||||||
for (i, child) in kids.into_iter().enumerate() {
|
|
||||||
let last = i + 1 == len;
|
|
||||||
fmt_node(child, f, &new_prefix, last)?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ----- Labeling -----
|
|
||||||
|
|
||||||
fn label<'src, 'a, 'b>(node: &AnyNode<'src, 'a, 'b>) -> String {
|
|
||||||
match node {
|
|
||||||
AnyNode::Expr(e) => expr_label(e),
|
|
||||||
AnyNode::Stmt(s) => stmt_label(s),
|
|
||||||
AnyNode::Case(c) => case_label(c),
|
|
||||||
AnyNode::Text(s) => s.clone(),
|
|
||||||
AnyNode::Tagged(tag, inner) => format!("{tag}: {}", label(inner)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn quote_str(s: &str) -> String {
|
|
||||||
let mut out = String::with_capacity(s.len() + 2);
|
|
||||||
out.push('"');
|
|
||||||
for ch in s.chars() {
|
|
||||||
match ch {
|
|
||||||
'\\' => out.push_str("\\\\"),
|
|
||||||
'"' => out.push_str("\\\""),
|
|
||||||
'\n' => out.push_str("\\n"),
|
|
||||||
'\r' => out.push_str("\\r"),
|
|
||||||
'\t' => out.push_str("\\t"),
|
|
||||||
c => out.push(c),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out.push('"');
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
fn expr_label<'src, 'a>(e: &Expression<'src, 'a>) -> String {
|
|
||||||
match e {
|
|
||||||
Expression::Binary(_, op, _) => format!("Binary {op}"),
|
|
||||||
Expression::LeftUnary(op, _) => format!("UnaryL {op}"),
|
|
||||||
Expression::RightUnary(_, op) => format!("UnaryR {op}"),
|
|
||||||
|
|
||||||
Expression::Identifier(s) => format!("Ident {s}"),
|
|
||||||
Expression::String(s) => {
|
|
||||||
// Avoid assuming ArenaString exposes &str; go via Display -> String.
|
|
||||||
format!("String {}", quote_str(&s.to_string()))
|
|
||||||
}
|
|
||||||
Expression::Integer(i) => format!("Int {i}"),
|
|
||||||
Expression::Float(x) => format!("Float {x}"),
|
|
||||||
Expression::Bool(true) => "Bool true".into(),
|
|
||||||
Expression::Bool(false) => "Bool false".into(),
|
|
||||||
Expression::None => "None".into(),
|
|
||||||
Expression::Parentheses(_) => "Parentheses".into(),
|
|
||||||
|
|
||||||
Expression::Block { statements, tail } => {
|
|
||||||
let n = statements.len() + usize::from(tail.is_some());
|
|
||||||
let tail_s = if tail.is_some() { " tail" } else { "" };
|
|
||||||
format!("BlockExpr ({n} items{tail_s})")
|
|
||||||
}
|
|
||||||
Expression::If { .. } => "IfExpr".into(),
|
|
||||||
Expression::While { .. } => "WhileExpr".into(),
|
|
||||||
Expression::DoUntil { .. } => "DoUntilExpr".into(),
|
|
||||||
Expression::ForEach { .. } => "ForEachExpr".into(),
|
|
||||||
Expression::For { .. } => "ForExpr".into(),
|
|
||||||
Expression::Switch {
|
|
||||||
cases,
|
|
||||||
default_arm: default,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
let d = if default.is_some() { " yes" } else { " no" };
|
|
||||||
format!("SwitchExpr cases={} default:{}", cases.len(), d)
|
|
||||||
}
|
|
||||||
Expression::Goto(label) => format!("Goto {}", label.to_string()),
|
|
||||||
Expression::Continue => "Continue".into(),
|
|
||||||
Expression::Break(Some(_)) => "Break value".into(),
|
|
||||||
Expression::Break(None) => "Break".into(),
|
|
||||||
Expression::Return(Some(_)) => "Return value".into(),
|
|
||||||
Expression::Return(None) => "Return".into(),
|
|
||||||
|
|
||||||
Expression::Error => "Error".into(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ----- Children collection -----
|
|
||||||
|
|
||||||
fn children<'src, 'a, 'b>(node: AnyNode<'src, 'a, 'b>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
|
||||||
match node {
|
|
||||||
AnyNode::Expr(e) => expr_children(e),
|
|
||||||
AnyNode::Stmt(s) => stmt_children(s),
|
|
||||||
AnyNode::Case(c) => case_children(c),
|
|
||||||
AnyNode::Text(_) => vec![],
|
|
||||||
AnyNode::Tagged(_, inner) => children(*inner),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Expression children can include statements inside Block/Switch.
|
|
||||||
fn expr_children<'src, 'a, 'b>(e: &'b Expression<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
|
||||||
match e {
|
|
||||||
// Purely expression subtrees
|
|
||||||
Expression::Binary(lhs, _, rhs) => vec![AnyNode::Expr(&*lhs), AnyNode::Expr(&*rhs)],
|
|
||||||
Expression::LeftUnary(_, expr) => vec![AnyNode::Expr(&*expr)],
|
|
||||||
Expression::RightUnary(expr, _) => vec![AnyNode::Expr(&*expr)],
|
|
||||||
Expression::Parentheses(expr) => vec![AnyNode::Expr(&*expr)],
|
|
||||||
|
|
||||||
// Structured expression forms
|
|
||||||
Expression::Block { statements, tail } => {
|
|
||||||
let mut out: Vec<AnyNode<'src, 'a, 'b>> = statements
|
|
||||||
.iter()
|
|
||||||
.map(|s| AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*s))))
|
|
||||||
.collect();
|
|
||||||
if let Some(t) = tail.as_ref() {
|
|
||||||
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::If {
|
|
||||||
condition,
|
|
||||||
body,
|
|
||||||
else_body,
|
|
||||||
} => {
|
|
||||||
let mut out = vec![
|
|
||||||
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
|
|
||||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
|
||||||
];
|
|
||||||
if let Some(e) = else_body {
|
|
||||||
out.push(AnyNode::Tagged("else", Box::new(AnyNode::Expr(&*e))));
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::While { condition, body } => vec![
|
|
||||||
AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*condition))),
|
|
||||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
|
||||||
],
|
|
||||||
|
|
||||||
Expression::DoUntil { condition, body } => vec![
|
|
||||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
|
||||||
AnyNode::Tagged("until", Box::new(AnyNode::Expr(&*condition))),
|
|
||||||
],
|
|
||||||
|
|
||||||
Expression::ForEach { iterator, body } => vec![
|
|
||||||
AnyNode::Tagged("iter", Box::new(AnyNode::Expr(&*iterator))),
|
|
||||||
AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))),
|
|
||||||
],
|
|
||||||
|
|
||||||
Expression::For {
|
|
||||||
init,
|
|
||||||
condition,
|
|
||||||
step,
|
|
||||||
body,
|
|
||||||
} => {
|
|
||||||
let mut out = Vec::with_capacity(4);
|
|
||||||
if let Some(i) = init {
|
|
||||||
out.push(AnyNode::Tagged("init", Box::new(AnyNode::Expr(&*i))));
|
|
||||||
}
|
|
||||||
if let Some(c) = condition {
|
|
||||||
out.push(AnyNode::Tagged("cond", Box::new(AnyNode::Expr(&*c))));
|
|
||||||
}
|
|
||||||
if let Some(s) = step {
|
|
||||||
out.push(AnyNode::Tagged("step", Box::new(AnyNode::Expr(&*s))));
|
|
||||||
}
|
|
||||||
out.push(AnyNode::Tagged("body", Box::new(AnyNode::Expr(&*body))));
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
Expression::Switch {
|
|
||||||
selector,
|
|
||||||
cases,
|
|
||||||
default_arm: default,
|
|
||||||
tail,
|
|
||||||
} => {
|
|
||||||
let mut out: Vec<AnyNode<'src, 'a, 'b>> = vec![AnyNode::Tagged(
|
|
||||||
"selector",
|
|
||||||
Box::new(AnyNode::Expr(&*selector)),
|
|
||||||
)];
|
|
||||||
|
|
||||||
for case in cases.iter() {
|
|
||||||
out.push(AnyNode::Tagged("case", Box::new(AnyNode::Case(&*case))));
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(d) = default.as_ref() {
|
|
||||||
for stmt in d.iter() {
|
|
||||||
out.push(AnyNode::Tagged("default", Box::new(AnyNode::Stmt(&*stmt))));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(t) = tail.as_ref() {
|
|
||||||
out.push(AnyNode::Tagged("tail", Box::new(AnyNode::Expr(&*t))));
|
|
||||||
}
|
|
||||||
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
// Leaves
|
|
||||||
Expression::Identifier(_)
|
|
||||||
| Expression::String(_)
|
|
||||||
| Expression::Integer(_)
|
|
||||||
| Expression::Float(_)
|
|
||||||
| Expression::Bool(_)
|
|
||||||
| Expression::None
|
|
||||||
| Expression::Goto(_)
|
|
||||||
| Expression::Continue
|
|
||||||
| Expression::Break(None)
|
|
||||||
| Expression::Return(None)
|
|
||||||
| Expression::Error => vec![],
|
|
||||||
|
|
||||||
// Single optional-child leaves
|
|
||||||
Expression::Break(Some(v)) => vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))],
|
|
||||||
Expression::Return(Some(v)) => {
|
|
||||||
vec![AnyNode::Tagged("value", Box::new(AnyNode::Expr(&*v)))]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn stmt_label<'src, 'a>(s: &Statement<'src, 'a>) -> String {
|
|
||||||
use Statement::*;
|
|
||||||
match s {
|
|
||||||
Empty => "Empty ;".into(),
|
|
||||||
Expression(_) => "Expression".into(),
|
|
||||||
|
|
||||||
LocalVariableDeclaration {
|
|
||||||
type_name,
|
|
||||||
identifiers: variable_names,
|
|
||||||
} => {
|
|
||||||
let count = variable_names.len();
|
|
||||||
let names = variable_names
|
|
||||||
.iter()
|
|
||||||
.map(|n| n.to_string())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(", ");
|
|
||||||
format!("LocalVarDecl type={type_name} count={count} names=[{names}]")
|
|
||||||
}
|
|
||||||
|
|
||||||
VariableDeclaration {
|
|
||||||
type_name,
|
|
||||||
declarations: variable_names,
|
|
||||||
} => {
|
|
||||||
let total = variable_names.len();
|
|
||||||
let inits = variable_names
|
|
||||||
.iter()
|
|
||||||
.filter(|v| v.initializer.is_some())
|
|
||||||
.count();
|
|
||||||
let names = variable_names
|
|
||||||
.iter()
|
|
||||||
.map(|VariableDeclarator { name, .. }| name.to_string())
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.join(", ");
|
|
||||||
format!("VarDecl type={type_name} vars={total} inits={inits} names=[{names}]")
|
|
||||||
}
|
|
||||||
|
|
||||||
Label(name) => format!("Label {name}"),
|
|
||||||
|
|
||||||
Error => "Error".into(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn stmt_children<'src, 'a, 'b>(s: &'b Statement<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
|
||||||
use Statement::*;
|
|
||||||
match s {
|
|
||||||
Empty | Label(_) | Error => vec![],
|
|
||||||
|
|
||||||
Expression(expr) => vec![AnyNode::Expr(&*expr)],
|
|
||||||
|
|
||||||
LocalVariableDeclaration {
|
|
||||||
identifiers: variable_names,
|
|
||||||
..
|
|
||||||
} => variable_names
|
|
||||||
.iter()
|
|
||||||
.map(|n| AnyNode::Text(format!("name: {n}")))
|
|
||||||
.collect(),
|
|
||||||
|
|
||||||
VariableDeclaration {
|
|
||||||
declarations: variable_names,
|
|
||||||
..
|
|
||||||
} => {
|
|
||||||
let mut out = Vec::new();
|
|
||||||
for VariableDeclarator {
|
|
||||||
name,
|
|
||||||
initializer: initial_value,
|
|
||||||
} in variable_names.iter()
|
|
||||||
{
|
|
||||||
out.push(AnyNode::Text(format!("var: {name}")));
|
|
||||||
if let Some(init_expr) = initial_value {
|
|
||||||
out.push(AnyNode::Tagged(
|
|
||||||
"init",
|
|
||||||
Box::new(AnyNode::Expr(&*init_expr)),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn case_children<'src, 'a, 'b>(c: &'b SwitchCase<'src, 'a>) -> Vec<AnyNode<'src, 'a, 'b>> {
|
|
||||||
let mut out = Vec::new();
|
|
||||||
for lbl in c.labels.iter() {
|
|
||||||
out.push(AnyNode::Tagged("label", Box::new(AnyNode::Expr(&*lbl))));
|
|
||||||
}
|
|
||||||
for stmt in c.body.iter() {
|
|
||||||
out.push(AnyNode::Tagged("stmt", Box::new(AnyNode::Stmt(&*stmt))));
|
|
||||||
}
|
|
||||||
out
|
|
||||||
}
|
|
||||||
|
|
||||||
fn case_label<'src, 'a>(c: &SwitchCase<'src, 'a>) -> String {
|
|
||||||
let l = c.labels.len();
|
|
||||||
let b = c.body.len();
|
|
||||||
format!("Case labels={l} body_items={b}")
|
|
||||||
}
|
|
||||||
@ -8,88 +8,186 @@
|
|||||||
//! General idea is that any method that returns something other than an error
|
//! General idea is that any method that returns something other than an error
|
||||||
//! can be assumed to have reported it.
|
//! can be assumed to have reported it.
|
||||||
|
|
||||||
use crate::lexer::{Token, TokenLocation};
|
use crate::ast::{AstSpan, CallableKind, IdentifierToken, QualifiedIdentifier};
|
||||||
|
use crate::diagnostics::Diagnostic;
|
||||||
|
use crate::lexer::{Token, TokenPosition};
|
||||||
use crate::parser::{ParseError, ParseResult, Parser};
|
use crate::parser::{ParseError, ParseResult, Parser};
|
||||||
|
|
||||||
/// Synchronization groups the parser can stop at during recovery.
|
/// Synchronization groups the parser can stop at during recovery.
|
||||||
///
|
///
|
||||||
/// Stronger levels subsume weaker ones. The enum's variant order defines this
|
/// The variant order defines recovery strength: later variants are treated as
|
||||||
/// ordering of strength via [`Ord`]; changing it changes recovery behavior.
|
/// "stronger" boundaries, so synchronizing to a weaker level will also stop
|
||||||
|
/// at any stronger one.
|
||||||
|
///
|
||||||
|
/// This enum is intentionally coarse-grained and semantic. It is not meant to
|
||||||
|
/// encode arbitrary token sets.
|
||||||
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq, PartialOrd, Ord)]
|
||||||
pub(crate) enum SyncLevel {
|
pub enum SyncLevel {
|
||||||
/// Tokens that appear inside expressions.
|
/// Tokens that can reasonably continue or restart an expression.
|
||||||
///
|
///
|
||||||
/// Includes operators, member access `.`, ternary `? :`, an opening `(`,
|
/// This is the loosest recovery level.
|
||||||
/// and identifiers.
|
|
||||||
Expression,
|
Expression,
|
||||||
/// List separator `,`.
|
|
||||||
|
/// Separator between homogeneous list elements, e.g. `,`.
|
||||||
|
///
|
||||||
|
/// Synchronizing here also stops at closing delimiters and stronger
|
||||||
|
/// structural boundaries.
|
||||||
ListSeparator,
|
ListSeparator,
|
||||||
/// Close of a parenthesized subexpression `)`.
|
|
||||||
|
/// Closing `>` of an angle-bracket-delimited type/class argument list.
|
||||||
|
CloseAngleBracket,
|
||||||
|
|
||||||
|
/// Closing `)` of a parenthesized/grouped construct.
|
||||||
CloseParenthesis,
|
CloseParenthesis,
|
||||||
/// Close of an index or list `]`.
|
|
||||||
|
/// Closing `]` of an index or bracket-delimited construct.
|
||||||
CloseBracket,
|
CloseBracket,
|
||||||
/// Statement boundary or starter.
|
|
||||||
|
/// A statement boundary or statement starter.
|
||||||
|
///
|
||||||
|
/// Includes `;` and keywords that begin standalone statements /
|
||||||
|
/// statement-like control-flow forms.
|
||||||
Statement,
|
Statement,
|
||||||
/// Block boundary braces (both `{` and `}`).
|
|
||||||
|
/// Start of a `switch` arm.
|
||||||
|
///
|
||||||
|
/// This is useful because `case` / `default` are stronger boundaries than
|
||||||
|
/// ordinary statements inside switch parsing.
|
||||||
|
SwitchArmStart,
|
||||||
|
|
||||||
|
/// Start of a declaration-like item.
|
||||||
|
///
|
||||||
|
/// Used for recovery in declaration-containing bodies where the next
|
||||||
|
/// sensible point is "the next member/declaration" rather than merely
|
||||||
|
/// "some statement".
|
||||||
|
DeclarationStart,
|
||||||
|
|
||||||
|
/// A hard block boundary.
|
||||||
|
///
|
||||||
|
/// This is the strongest normal recovery point.
|
||||||
BlockBoundary,
|
BlockBoundary,
|
||||||
/// Start of a top-level or class-level declaration.
|
|
||||||
TopDeclaration,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SyncLevel {
|
impl SyncLevel {
|
||||||
/// Converts [`Token`] to its [`SyncLevel`], if it has one.
|
/// Converts a token to its synchronization class, if any.
|
||||||
fn for_token(token: Token) -> Option<SyncLevel> {
|
const fn for_token(token: Token) -> Option<Self> {
|
||||||
use SyncLevel::*;
|
use crate::lexer::Keyword;
|
||||||
use Token::*;
|
use SyncLevel::{
|
||||||
|
BlockBoundary, CloseAngleBracket, CloseBracket, CloseParenthesis, DeclarationStart,
|
||||||
|
Expression, ListSeparator, Statement, SwitchArmStart,
|
||||||
|
};
|
||||||
|
|
||||||
match token {
|
match token {
|
||||||
Exponentiation | Increment | Decrement | Not | BitwiseNot | Dot | Cross | Multiply
|
// Expression-level recovery points
|
||||||
| Divide | Modulo | Plus | Minus | ConcatSpace | Concat | LeftShift
|
Token::Exponentiation
|
||||||
| LogicalRightShift | RightShift | Less | LessEqual | Greater | GreaterEqual
|
| Token::Increment
|
||||||
| Equal | NotEqual | ApproximatelyEqual | ClockwiseFrom | BitwiseAnd | BitwiseOr
|
| Token::Decrement
|
||||||
| BitwiseXor | And | Xor | Or | Assign | MultiplyAssign | DivideAssign
|
| Token::Not
|
||||||
| ModuloAssign | PlusAssign | MinusAssign | ConcatAssign | ConcatSpaceAssign
|
| Token::BitwiseNot
|
||||||
| Period | Question | Colon | LeftParenthesis | Identifier => Some(Expression),
|
| Token::Multiply
|
||||||
|
| Token::Divide
|
||||||
Comma => Some(ListSeparator),
|
| Token::Modulo
|
||||||
|
| Token::Plus
|
||||||
RightParenthesis => Some(CloseParenthesis),
|
| Token::Minus
|
||||||
RightBracket => Some(CloseBracket),
|
| Token::ConcatSpace
|
||||||
|
| Token::Concat
|
||||||
Case | Default | If | Else | Switch | For | ForEach | While | Do | Return | Break
|
| Token::LeftShift
|
||||||
| Continue | Local | Semicolon => Some(Statement),
|
| Token::LogicalRightShift
|
||||||
|
| Token::RightShift
|
||||||
Brace(_) | RightBrace => Some(BlockBoundary),
|
| Token::LessEqual
|
||||||
|
| Token::GreaterEqual
|
||||||
Class | Struct | Enum | State | Function | Event | Delegate | Operator | Var
|
| Token::Equal
|
||||||
| Replication | NativeReplication | DefaultProperties | CppText | ExecDirective => {
|
| Token::NotEqual
|
||||||
Some(TopDeclaration)
|
| Token::ApproximatelyEqual
|
||||||
|
| Token::BitwiseAnd
|
||||||
|
| Token::BitwiseOr
|
||||||
|
| Token::BitwiseXor
|
||||||
|
| Token::LogicalAnd
|
||||||
|
| Token::LogicalXor
|
||||||
|
| Token::LogicalOr
|
||||||
|
| Token::Assign
|
||||||
|
| Token::MultiplyAssign
|
||||||
|
| Token::DivideAssign
|
||||||
|
| Token::ModuloAssign
|
||||||
|
| Token::PlusAssign
|
||||||
|
| Token::MinusAssign
|
||||||
|
| Token::ConcatAssign
|
||||||
|
| Token::ConcatSpaceAssign
|
||||||
|
| Token::Period
|
||||||
|
| Token::Question
|
||||||
|
| Token::Colon
|
||||||
|
| Token::LeftParenthesis
|
||||||
|
| Token::Identifier
|
||||||
|
| Token::Keyword(Keyword::Dot | Keyword::Cross | Keyword::ClockwiseFrom) => {
|
||||||
|
Some(Expression)
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => Option::None,
|
// List / delimiter boundaries
|
||||||
|
Token::Comma => Some(ListSeparator),
|
||||||
|
Token::Greater => Some(CloseAngleBracket),
|
||||||
|
Token::RightParenthesis => Some(CloseParenthesis),
|
||||||
|
Token::RightBracket => Some(CloseBracket),
|
||||||
|
|
||||||
|
// Statement-level boundaries
|
||||||
|
Token::Semicolon
|
||||||
|
| Token::Keyword(
|
||||||
|
Keyword::If
|
||||||
|
| Keyword::Else
|
||||||
|
| Keyword::Switch
|
||||||
|
| Keyword::For
|
||||||
|
| Keyword::ForEach
|
||||||
|
| Keyword::While
|
||||||
|
| Keyword::Do
|
||||||
|
| Keyword::Until
|
||||||
|
| Keyword::Return
|
||||||
|
| Keyword::Break
|
||||||
|
| Keyword::Continue
|
||||||
|
| Keyword::Local,
|
||||||
|
) => Some(Statement),
|
||||||
|
|
||||||
|
// Switch-specific stronger boundary
|
||||||
|
Token::Keyword(Keyword::Case | Keyword::Default) => Some(SwitchArmStart),
|
||||||
|
|
||||||
|
// Declaration/member starts
|
||||||
|
Token::Keyword(
|
||||||
|
Keyword::Class
|
||||||
|
| Keyword::Struct
|
||||||
|
| Keyword::Enum
|
||||||
|
| Keyword::State
|
||||||
|
| Keyword::Function
|
||||||
|
| Keyword::Event
|
||||||
|
| Keyword::Delegate
|
||||||
|
| Keyword::Operator
|
||||||
|
| Keyword::Var
|
||||||
|
| Keyword::Replication
|
||||||
|
| Keyword::NativeReplication
|
||||||
|
| Keyword::DefaultProperties
|
||||||
|
| Keyword::CppText
|
||||||
|
| Keyword::CppStruct,
|
||||||
|
)
|
||||||
|
| Token::ExecDirective => Some(DeclarationStart),
|
||||||
|
|
||||||
|
// Hard structural stop
|
||||||
|
Token::LeftBrace | Token::CppBlock | Token::RightBrace => Some(BlockBoundary),
|
||||||
|
|
||||||
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> Parser<'src, 'arena> {
|
impl Parser<'_, '_> {
|
||||||
/// Converts a parse error into a diagnostic and queues it.
|
/// Converts a parse error into a diagnostic and queues it.
|
||||||
///
|
///
|
||||||
/// Placeholder implementation.
|
/// Placeholder implementation.
|
||||||
fn handle_error(&mut self, error: ParseError) {
|
pub fn report_error(&mut self, error: ParseError) {
|
||||||
let diagnostic = crate::diagnostics::DiagnosticBuilder::error(format!(
|
self.diagnostics.push(Diagnostic::from(error));
|
||||||
"error {:?} while parsing",
|
|
||||||
error.kind
|
|
||||||
))
|
|
||||||
.primary_label(error.source_span, "happened here")
|
|
||||||
.build();
|
|
||||||
self.diagnostics.push(diagnostic);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Reports a parser error with [`crate::parser::ParseErrorKind`] at
|
/// Reports a parser error with [`crate::parser::ParseErrorKind`] at
|
||||||
/// the current location and queues an appropriate diagnostic.
|
/// the current location and queues an appropriate diagnostic.
|
||||||
pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) {
|
pub fn report_error_here(&mut self, error_kind: crate::parser::ParseErrorKind) {
|
||||||
let new_error = self.make_error_here(error_kind);
|
let new_error = self.make_error_here(error_kind);
|
||||||
self.handle_error(new_error);
|
self.report_error(new_error);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Skips tokens until a token with `min_sync` level or stronger is found.
|
/// Skips tokens until a token with `min_sync` level or stronger is found.
|
||||||
@ -111,18 +209,32 @@ impl<'src, 'arena> Parser<'src, 'arena> {
|
|||||||
|
|
||||||
/// Supplies a fallback value after a parse error so parsing can continue and
|
/// Supplies a fallback value after a parse error so parsing can continue and
|
||||||
/// reveal further errors.
|
/// reveal further errors.
|
||||||
pub(crate) trait RecoveryFallback<'src, 'arena>: Sized {
|
pub trait RecoveryFallback<'src, 'arena>: Sized {
|
||||||
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self;
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Extends [`ParseResult`] with recovery-related methods for
|
/// Extends [`ParseResult`] with recovery-related methods for
|
||||||
/// fluent error handling.
|
/// fluent error handling.
|
||||||
pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
|
/// !!!! Can we store a parser reference instead of passing it into every method?
|
||||||
|
pub trait ResultRecoveryExt<'src, 'arena, T>: Sized {
|
||||||
/// Extends the left end of the error span to `from`.
|
/// Extends the left end of the error span to `from`.
|
||||||
///
|
///
|
||||||
/// Does nothing if `Self` is `Ok(...)`.
|
/// Does nothing if `Self` is `Ok(...)`.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn widen_error_span_from(self, from: TokenLocation) -> Self;
|
fn widen_error_span_from(self, from: TokenPosition) -> Self;
|
||||||
|
|
||||||
|
fn blame(self, blame_span: AstSpan) -> Self;
|
||||||
|
fn related(self, related_span: AstSpan) -> Self;
|
||||||
|
|
||||||
|
fn blame_token(self, blame_position: TokenPosition) -> Self {
|
||||||
|
self.blame(AstSpan::new(blame_position))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extend_blame_to_covered_end(self) -> Self;
|
||||||
|
|
||||||
|
fn related_token(self, related_position: TokenPosition) -> Self {
|
||||||
|
self.related(AstSpan::new(related_position))
|
||||||
|
}
|
||||||
|
|
||||||
/// Extends the right end of the error span up to but not including
|
/// Extends the right end of the error span up to but not including
|
||||||
/// the next token of the given sync `level`.
|
/// the next token of the given sync `level`.
|
||||||
@ -140,28 +252,44 @@ pub(crate) trait ResultRecoveryExt<'src, 'arena, T>: Sized {
|
|||||||
|
|
||||||
/// Either returns expected value or its best effort fallback.
|
/// Either returns expected value or its best effort fallback.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T;
|
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
|
||||||
|
where
|
||||||
|
T: RecoveryFallback<'src, 'arena>;
|
||||||
|
|
||||||
/// Produces the contained value if successful,
|
/// Produces the contained value if successful,
|
||||||
/// or a fallback if an error occurred.
|
/// or a fallback if an error occurred.
|
||||||
fn report_error(self, parser: &mut Parser<'src, 'arena>);
|
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool;
|
||||||
|
|
||||||
|
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T>;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T>
|
impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> {
|
||||||
where
|
fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
|
||||||
T: RecoveryFallback<'src, 'arena>,
|
|
||||||
{
|
|
||||||
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
|
|
||||||
if let Err(ref mut error) = self {
|
if let Err(ref mut error) = self {
|
||||||
error.source_span.from = std::cmp::min(error.source_span.from, from);
|
error.covered_span.token_from = std::cmp::min(error.covered_span.token_from, from);
|
||||||
}
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn blame(self, blame_span: AstSpan) -> Self {
|
||||||
|
self.map_err(|error| error.blame(blame_span))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extend_blame_to_covered_end(self) -> Self {
|
||||||
|
self.map_err(|error| error.extend_blame_to_covered_end())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn related(self, related_span: AstSpan) -> Self {
|
||||||
|
self.map_err(|error| error.related(related_span))
|
||||||
|
}
|
||||||
|
|
||||||
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||||
if let Err(ref mut error) = self {
|
if let Err(ref mut error) = self {
|
||||||
parser.recover_until(level);
|
parser.recover_until(level);
|
||||||
error.source_span.to = parser.last_visited_location();
|
error.covered_span.token_to = std::cmp::max(
|
||||||
|
error.covered_span.token_to,
|
||||||
|
parser.last_consumed_position_or_start(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@ -169,72 +297,213 @@ where
|
|||||||
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||||
if let Err(ref mut error) = self {
|
if let Err(ref mut error) = self {
|
||||||
parser.recover_until(level);
|
parser.recover_until(level);
|
||||||
error.source_span.to = parser.peek_location();
|
|
||||||
// If we're at end-of-file, this'll simply do nothing.
|
// If we're at end-of-file, this'll simply do nothing.
|
||||||
|
if parser
|
||||||
|
.peek_token()
|
||||||
|
.and_then(SyncLevel::for_token)
|
||||||
|
.is_some_and(|next_level| next_level == level)
|
||||||
|
{
|
||||||
parser.advance();
|
parser.advance();
|
||||||
}
|
}
|
||||||
|
error.covered_span.token_to = parser.last_consumed_position_or_start(); // need to be peek
|
||||||
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T {
|
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> T
|
||||||
|
where
|
||||||
|
T: RecoveryFallback<'src, 'arena>,
|
||||||
|
{
|
||||||
self.unwrap_or_else(|error| {
|
self.unwrap_or_else(|error| {
|
||||||
let value = T::fallback_value(parser, &error);
|
let value = T::fallback_value(parser, &error);
|
||||||
parser.handle_error(error);
|
parser.report_error(error);
|
||||||
value
|
value
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
|
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
|
||||||
if let Err(error) = self {
|
if let Err(error) = self {
|
||||||
parser.handle_error(error);
|
parser.report_error(error);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<T> {
|
||||||
|
match self {
|
||||||
|
Ok(value) => Some(value),
|
||||||
|
Err(error) => {
|
||||||
|
parser.report_error(error);
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError {
|
||||||
fn widen_error_span_from(mut self, from: TokenLocation) -> Self {
|
fn widen_error_span_from(mut self, from: TokenPosition) -> Self {
|
||||||
self.source_span.from = std::cmp::min(self.source_span.from, from);
|
self.covered_span.token_from = std::cmp::min(self.covered_span.token_from, from);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn blame(mut self, blame_span: AstSpan) -> Self {
|
||||||
|
self.blame_span = blame_span;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extend_blame_to_covered_end(mut self) -> Self {
|
||||||
|
self.blame_span.token_to = self.covered_span.token_to;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
fn related(mut self, related_span: AstSpan) -> Self {
|
||||||
|
self.related_span = Some(related_span);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||||
parser.recover_until(level);
|
parser.recover_until(level);
|
||||||
self.source_span.to = parser.last_visited_location();
|
self.covered_span.token_to = parser.last_consumed_position_or_start();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self {
|
||||||
parser.recover_until(level);
|
parser.recover_until(level);
|
||||||
self.source_span.to = parser.peek_location();
|
|
||||||
// If we're at end-of-file, this'll simply do nothing.
|
// If we're at end-of-file, this'll simply do nothing.
|
||||||
parser.advance();
|
parser.advance();
|
||||||
|
self.covered_span.token_to = parser.last_consumed_position_or_start();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) -> () {
|
fn unwrap_or_fallback(self, parser: &mut Parser<'src, 'arena>) {
|
||||||
parser.handle_error(self);
|
parser.report_error(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn report_error(self, parser: &mut Parser<'src, 'arena>) {
|
fn report_error(self, parser: &mut Parser<'src, 'arena>) -> bool {
|
||||||
parser.handle_error(self);
|
parser.report_error(self);
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ok_or_report(self, parser: &mut Parser<'src, 'arena>) -> Option<()> {
|
||||||
|
parser.report_error(self);
|
||||||
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for i128 {
|
||||||
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||||
i128::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 {
|
||||||
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||||
f64::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenLocation {
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::IdentifierToken {
|
||||||
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
error.source_span.to
|
Self(error.covered_span.token_from)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
|
||||||
|
for crate::ast::CallableDefinitionRef<'src, 'arena>
|
||||||
|
{
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
// default return type: Named("") at error span
|
||||||
|
let ret_id = crate::ast::IdentifierToken(err.covered_span.token_from);
|
||||||
|
let return_type = crate::arena::ArenaNode::new_in(
|
||||||
|
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, ret_id)),
|
||||||
|
err.covered_span,
|
||||||
|
parser.arena,
|
||||||
|
);
|
||||||
|
|
||||||
|
let def = crate::ast::CallableDefinition {
|
||||||
|
name: crate::ast::CallableName::Identifier(IdentifierToken(
|
||||||
|
err.covered_span.token_from,
|
||||||
|
)),
|
||||||
|
kind: CallableKind::Function,
|
||||||
|
return_type_specifier: Some(return_type),
|
||||||
|
modifiers: parser.arena.vec(),
|
||||||
|
parameters: parser.arena.vec(),
|
||||||
|
body: None,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StructDefRef<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let def = crate::ast::StructDefinition {
|
||||||
|
name: None,
|
||||||
|
base_type_name: None,
|
||||||
|
modifiers: parser.arena.vec(),
|
||||||
|
fields: parser.arena.vec(),
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassVarDeclRef<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let dummy_ident = crate::ast::IdentifierToken(err.covered_span.token_from);
|
||||||
|
let type_spec = crate::arena::ArenaNode::new_in(
|
||||||
|
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(
|
||||||
|
parser.arena,
|
||||||
|
dummy_ident,
|
||||||
|
)),
|
||||||
|
err.covered_span,
|
||||||
|
parser.arena,
|
||||||
|
);
|
||||||
|
let def = crate::ast::ClassVarDecl {
|
||||||
|
paren_specs: None,
|
||||||
|
modifiers: parser.arena.vec(),
|
||||||
|
type_spec,
|
||||||
|
declarators: parser.arena.vec(),
|
||||||
|
span: err.covered_span,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena>
|
||||||
|
for crate::ast::ReplicationBlockRef<'src, 'arena>
|
||||||
|
{
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let def = crate::ast::ReplicationBlock {
|
||||||
|
rules: parser.arena.vec(),
|
||||||
|
span: err.covered_span,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let def = crate::ast::StateDecl {
|
||||||
|
name: crate::ast::IdentifierToken(err.covered_span.token_from),
|
||||||
|
parent: None,
|
||||||
|
modifiers: parser.arena.vec(),
|
||||||
|
ignores: None,
|
||||||
|
body: parser.arena.vec(),
|
||||||
|
span: err.covered_span,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenPosition {
|
||||||
|
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
|
error.covered_span.token_to
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for (Token, TokenPosition) {
|
||||||
|
fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
|
(Token::Error, error.covered_span.token_to)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,7 +511,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExpressionRef<
|
|||||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
crate::arena::ArenaNode::new_in(
|
crate::arena::ArenaNode::new_in(
|
||||||
crate::ast::Expression::Error,
|
crate::ast::Expression::Error,
|
||||||
error.source_span,
|
error.covered_span,
|
||||||
parser.arena,
|
parser.arena,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -252,17 +521,51 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StatementRef<'
|
|||||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
||||||
crate::arena::ArenaNode::new_in(
|
crate::arena::ArenaNode::new_in(
|
||||||
crate::ast::Statement::Error,
|
crate::ast::Statement::Error,
|
||||||
error.source_span,
|
error.covered_span,
|
||||||
parser.arena,
|
parser.arena,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T>
|
impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option<T> {
|
||||||
where
|
fn fallback_value(_: &Parser<'src, 'arena>, _: &ParseError) -> Self {
|
||||||
T: RecoveryFallback<'src, 'arena>,
|
None
|
||||||
{
|
}
|
||||||
fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self {
|
}
|
||||||
Some(T::fallback_value(parser, error))
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDeclRef<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let name = crate::ast::IdentifierToken(err.covered_span.token_from);
|
||||||
|
let value = crate::ast::DeclarationLiteralRef {
|
||||||
|
literal: crate::ast::DeclarationLiteral::None,
|
||||||
|
position: err.covered_span.token_from,
|
||||||
|
};
|
||||||
|
let def = crate::ast::ClassConstDecl {
|
||||||
|
name,
|
||||||
|
value,
|
||||||
|
span: err.covered_span,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::TypeSpecifierRef<'src, 'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let dummy = crate::ast::IdentifierToken(err.covered_span.token_from);
|
||||||
|
crate::arena::ArenaNode::new_in(
|
||||||
|
crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, dummy)),
|
||||||
|
err.covered_span,
|
||||||
|
parser.arena,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ExecDirectiveRef<'arena> {
|
||||||
|
fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self {
|
||||||
|
let def = crate::ast::ExecDirective {
|
||||||
|
text: parser.arena.string(""),
|
||||||
|
span: err.covered_span,
|
||||||
|
};
|
||||||
|
crate::arena::ArenaNode::new_in(def, err.covered_span, parser.arena)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
//! This module provides trivia token collection mechanism that lets parser code
|
//! Records trivia separately from significant tokens so parser code can work
|
||||||
//! iterate over significant tokens while ignoring trivia and preserving
|
//! with significant tokens without losing comments, whitespace, or line
|
||||||
//! full information for linting, formatting, and documentation.
|
//! structure.
|
||||||
//!
|
//!
|
||||||
//! Tokens considered *trivia* are:
|
//! Tokens considered *trivia* are:
|
||||||
//!
|
//!
|
||||||
@ -10,13 +10,27 @@
|
|||||||
//! 4. [`crate::lexer::Token::Whitespace`].
|
//! 4. [`crate::lexer::Token::Whitespace`].
|
||||||
//!
|
//!
|
||||||
//! Every other token is considered *significant*.
|
//! Every other token is considered *significant*.
|
||||||
|
//!
|
||||||
|
//! ## Required usage
|
||||||
|
//!
|
||||||
|
//! This is an internal helper. Callers must follow the protocol below.
|
||||||
|
//!
|
||||||
|
//! [`TriviaIndexBuilder`] must be driven over a single token stream in
|
||||||
|
//! strictly increasing [`TokenPosition`] order.
|
||||||
|
//! Call [`TriviaIndexBuilder::record_trivia`] for each trivia token in source
|
||||||
|
//! order, and call [`TriviaIndexBuilder::record_significant_token`] for each
|
||||||
|
//! significant token.
|
||||||
|
//!
|
||||||
|
//! After the last significant token has been processed, call
|
||||||
|
//! [`TriviaIndexBuilder::into_index`] to attach any trailing trivia.
|
||||||
|
//!
|
||||||
|
//! Violating this protocol is a logic error.
|
||||||
|
|
||||||
use crate::lexer::TokenLocation;
|
use crate::lexer::TokenPosition;
|
||||||
|
|
||||||
/// Types of trivia tokens, corresponding directly to the matching variants of
|
/// Kinds of trivia tokens corresponding to variants of [`crate::lexer::Token`].
|
||||||
/// [`crate::lexer::Token`].
|
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
pub(crate) enum TriviaKind {
|
pub enum TriviaKind {
|
||||||
Whitespace,
|
Whitespace,
|
||||||
Newline,
|
Newline,
|
||||||
LineComment,
|
LineComment,
|
||||||
@ -29,269 +43,215 @@ impl std::convert::TryFrom<crate::lexer::Token> for TriviaKind {
|
|||||||
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
|
fn try_from(token: crate::lexer::Token) -> Result<Self, Self::Error> {
|
||||||
use crate::lexer::Token;
|
use crate::lexer::Token;
|
||||||
match token {
|
match token {
|
||||||
Token::Whitespace => Ok(TriviaKind::Whitespace),
|
Token::Whitespace => Ok(Self::Whitespace),
|
||||||
Token::Newline => Ok(TriviaKind::Newline),
|
Token::Newline => Ok(Self::Newline),
|
||||||
Token::LineComment => Ok(TriviaKind::LineComment),
|
Token::LineComment => Ok(Self::LineComment),
|
||||||
Token::BlockComment => Ok(TriviaKind::BlockComment),
|
Token::BlockComment => Ok(Self::BlockComment),
|
||||||
_ => Err(()),
|
_ => Err(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Complete description of a trivia token.
|
/// A recorded trivia token.
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
|
||||||
pub(crate) struct TriviaToken<'src> {
|
pub struct TriviaToken<'src> {
|
||||||
/// Specific type of the trivia.
|
/// Kind of trivia token.
|
||||||
pub kind: TriviaKind,
|
pub kind: TriviaKind,
|
||||||
/// Actual content of the token.
|
/// Source text of the token.
|
||||||
pub text: &'src str,
|
pub text: &'src str,
|
||||||
/// Location of this trivia token in the token stream.
|
/// Location of this trivia token in the token stream.
|
||||||
pub location: TokenLocation,
|
pub position: TokenPosition,
|
||||||
}
|
}
|
||||||
|
|
||||||
type TriviaRange = std::ops::Range<usize>;
|
type TriviaRangeMap = std::collections::HashMap<BoundaryLocation, std::ops::Range<usize>>;
|
||||||
type TriviaMap = std::collections::HashMap<TriviaLocation, TriviaRange>;
|
|
||||||
|
|
||||||
/// Immutable index over all recorded trivia.
|
/// Extends [`TokenPosition`] with start-of-file and end-of-file markers.
|
||||||
///
|
///
|
||||||
/// Enables O(1) access to trivia immediately before/after any significant
|
/// Regular [`TokenPosition`] values are enough for significant tokens, but
|
||||||
/// token, plus file-leading and file-trailing trivia. Returned slices alias
|
/// trivia also needs to represent content before the first significant token
|
||||||
/// internal storage and live for `'src`.
|
/// and after the last one.
|
||||||
#[derive(Clone, Debug, Default)]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) struct TriviaIndex<'src> {
|
|
||||||
/// All trivia tokens, stored contiguously in file order.
|
|
||||||
tokens: Vec<TriviaToken<'src>>,
|
|
||||||
/// Maps token location to the trivia tokens stored right after it.
|
|
||||||
after_map: TriviaMap,
|
|
||||||
/// Maps token location to the trivia tokens stored right before it.
|
|
||||||
before_map: TriviaMap,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Extends [`TokenLocation`] with *start of file* value.
|
|
||||||
///
|
|
||||||
/// Regular [`TokenLocation`] does not need this value, but trivia requires
|
|
||||||
/// a way to express "trivia before any significant token".
|
|
||||||
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
enum TriviaLocation {
|
enum BoundaryLocation {
|
||||||
/// Position before any tokens, trivia or otherwise.
|
|
||||||
StartOfFile,
|
StartOfFile,
|
||||||
/// This variant can also express "end of file" through
|
Token(TokenPosition),
|
||||||
/// [`TokenLocation::EndOfFile`].
|
EndOfFile,
|
||||||
At(TokenLocation),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mutable builder for `TriviaIndex`.
|
/// Immutable index over recorded trivia.
|
||||||
///
|
///
|
||||||
/// Used inside the parser to record trivia between successive significant
|
/// Provides O(1) access to trivia immediately before or after any significant
|
||||||
/// tokens in file order, then frozen via `into_index`.
|
/// token, as well as file-leading and file-trailing trivia. Returned slices
|
||||||
#[derive(Debug, Default)]
|
/// borrow the index, and the contained token texts live for `'src`.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq, Default)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub(crate) struct TriviaComponent<'src> {
|
pub struct TriviaIndex<'src> {
|
||||||
/// All trivia tokens, stored contiguously in file order.
|
/// All trivia tokens, stored contiguously in file order.
|
||||||
tokens: Vec<TriviaToken<'src>>,
|
tokens: Vec<TriviaToken<'src>>,
|
||||||
/// Maps token location to the trivia tokens stored right after it.
|
/// Maps a trivia boundary location to the trivia tokens stored right
|
||||||
after_map: TriviaMap,
|
/// after it.
|
||||||
/// Maps token location to the trivia tokens stored right before it.
|
trivia_after_boundary: TriviaRangeMap,
|
||||||
before_map: TriviaMap,
|
/// Maps a trivia boundary location to the trivia tokens stored right
|
||||||
/// Location of the last gap's right boundary,
|
/// before it.
|
||||||
/// for debug-time invariant checks.
|
trivia_before_boundary: TriviaRangeMap,
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
last_right_boundary: Option<TriviaLocation>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> TriviaComponent<'src> {
|
/// Mutable builder for [`TriviaIndex`].
|
||||||
/// Records trivia tokens that lie strictly between
|
///
|
||||||
/// `previous_token_location` and `next_token_location`.
|
/// Records trivia between successive significant tokens while the caller walks
|
||||||
///
|
/// a token stream in file order. Once all tokens have been processed, call
|
||||||
/// [`None`] for `previous_token_location` means beginning of file;
|
/// [`TriviaIndexBuilder::into_index`] to finalize the index.
|
||||||
/// `next_token_location` may be [`TokenLocation::EndOfFile`].
|
#[derive(Debug)]
|
||||||
///
|
#[allow(dead_code)]
|
||||||
/// Empties `gap_trivia` without changing its capacity.
|
pub struct TriviaIndexBuilder<'src> {
|
||||||
///
|
/// All trivia tokens, stored contiguously in file order.
|
||||||
/// Requirements (checked in debug builds):
|
tokens: Vec<TriviaToken<'src>>,
|
||||||
/// - previous_token_location < next_token_location;
|
/// Maps boundary location to the trivia tokens stored right after it.
|
||||||
/// - calls are monotonic: each gap starts at or after the last end;
|
trivia_after_boundary: TriviaRangeMap,
|
||||||
/// - `collected` is nonempty and strictly ordered by `location`;
|
/// Maps boundary location to the trivia tokens stored right before it.
|
||||||
/// - all `collected` lie strictly inside (prev, next).
|
trivia_before_boundary: TriviaRangeMap,
|
||||||
pub(crate) fn record_between_locations(
|
|
||||||
&mut self,
|
|
||||||
previous_token_location: Option<TokenLocation>,
|
|
||||||
next_token_location: TokenLocation,
|
|
||||||
gap_trivia: &mut Vec<TriviaToken<'src>>,
|
|
||||||
) {
|
|
||||||
#[cfg(debug_assertions)]
|
|
||||||
self.debug_assert_valid_recording_batch(
|
|
||||||
previous_token_location,
|
|
||||||
next_token_location,
|
|
||||||
&gap_trivia,
|
|
||||||
);
|
|
||||||
|
|
||||||
if gap_trivia.is_empty() {
|
/// Trivia collected since the last significant token (or file start),
|
||||||
|
/// not yet attached to a right boundary.
|
||||||
|
pending_trivia: Vec<TriviaToken<'src>>,
|
||||||
|
|
||||||
|
/// Left boundary of the currently open gap.
|
||||||
|
current_left_boundary: BoundaryLocation,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TriviaIndexBuilder<'_> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
tokens: Vec::new(),
|
||||||
|
trivia_after_boundary: TriviaRangeMap::default(),
|
||||||
|
trivia_before_boundary: TriviaRangeMap::default(),
|
||||||
|
pending_trivia: Vec::new(),
|
||||||
|
current_left_boundary: BoundaryLocation::StartOfFile,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src> TriviaIndexBuilder<'src> {
|
||||||
|
/// Records `token` as trivia.
|
||||||
|
///
|
||||||
|
/// Tokens must be recorded in file order.
|
||||||
|
pub(crate) fn record_trivia(&mut self, token: TriviaToken<'src>) {
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
self.debug_assert_position_is_in_order(token.position);
|
||||||
|
|
||||||
|
self.pending_trivia.push(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Records a significant token at `position`.
|
||||||
|
///
|
||||||
|
/// Positions must be recorded in file order.
|
||||||
|
pub(crate) fn record_significant_token(&mut self, position: TokenPosition) {
|
||||||
|
let right_boundary = BoundaryLocation::Token(position);
|
||||||
|
|
||||||
|
#[cfg(debug_assertions)]
|
||||||
|
self.debug_assert_position_is_in_order(position);
|
||||||
|
|
||||||
|
self.flush_pending_trivia_to_boundary(right_boundary);
|
||||||
|
self.current_left_boundary = right_boundary;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stores one trivia range under both neighboring boundaries so lookups
|
||||||
|
// from either side return the same slice.
|
||||||
|
fn flush_pending_trivia_to_boundary(&mut self, right_boundary: BoundaryLocation) {
|
||||||
|
if self.pending_trivia.is_empty() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
let previous_token_location = previous_token_location
|
|
||||||
.map(TriviaLocation::At)
|
|
||||||
.unwrap_or(TriviaLocation::StartOfFile);
|
|
||||||
let next_token_location = TriviaLocation::At(next_token_location);
|
|
||||||
|
|
||||||
let trivia_start = self.tokens.len();
|
let trivia_start = self.tokens.len();
|
||||||
self.tokens.append(gap_trivia);
|
self.tokens.append(&mut self.pending_trivia);
|
||||||
let trivia_end = self.tokens.len();
|
let trivia_end = self.tokens.len();
|
||||||
|
self.trivia_after_boundary
|
||||||
self.after_map
|
.insert(self.current_left_boundary, trivia_start..trivia_end);
|
||||||
.insert(previous_token_location, trivia_start..trivia_end);
|
self.trivia_before_boundary
|
||||||
self.before_map
|
.insert(right_boundary, trivia_start..trivia_end);
|
||||||
.insert(next_token_location, trivia_start..trivia_end);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Freezes into an immutable, shareable index.
|
/// Finalizes the builder and returns the completed trivia index.
|
||||||
|
///
|
||||||
|
/// Any pending trivia is recorded as trailing trivia.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub(crate) fn into_index(self) -> TriviaIndex<'src> {
|
pub(crate) fn into_index(mut self) -> TriviaIndex<'src> {
|
||||||
|
self.flush_pending_trivia_to_boundary(BoundaryLocation::EndOfFile);
|
||||||
|
|
||||||
TriviaIndex {
|
TriviaIndex {
|
||||||
tokens: self.tokens,
|
tokens: self.tokens,
|
||||||
after_map: self.after_map,
|
trivia_after_boundary: self.trivia_after_boundary,
|
||||||
before_map: self.before_map,
|
trivia_before_boundary: self.trivia_before_boundary,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trivia immediately after the significant token at `location`.
|
// Catches out-of-order recording during development; the builder relies
|
||||||
///
|
// on this ordering invariant.
|
||||||
/// Returns an empty slice if `location` is not pointing at
|
|
||||||
/// a significant token or if no trivia was recorded after it.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(TriviaLocation::At(location), &self.after_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia immediately before the significant token at `location`.
|
|
||||||
///
|
|
||||||
/// Returns an empty slice if `location` is not pointing at
|
|
||||||
/// a significant token or if no trivia was recorded before it.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(TriviaLocation::At(location), &self.before_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia before any significant token.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia after the last significant token.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(
|
|
||||||
TriviaLocation::At(TokenLocation::EndOfFile),
|
|
||||||
&self.before_map,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper: return the recorded slice or an empty slice if none.
|
|
||||||
#[track_caller]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
|
|
||||||
if let Some(range) = map.get(&key) {
|
|
||||||
// Ranges are guaranteed to be valid by construction
|
|
||||||
&self.tokens[range.start..range.end]
|
|
||||||
} else {
|
|
||||||
&[]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Debug-only validation for `record_between_locations`'s contract.
|
|
||||||
#[cfg(debug_assertions)]
|
#[cfg(debug_assertions)]
|
||||||
fn debug_assert_valid_recording_batch(
|
fn debug_assert_position_is_in_order(&self, position: TokenPosition) {
|
||||||
&mut self,
|
let location = BoundaryLocation::Token(position);
|
||||||
previous_token_location: Option<TokenLocation>,
|
debug_assert!(location > self.current_left_boundary);
|
||||||
next_token_location: TokenLocation,
|
if let Some(last) = self.pending_trivia.last() {
|
||||||
collected: &[TriviaToken<'src>],
|
debug_assert!(last.position < position);
|
||||||
) {
|
|
||||||
// Prevent zero-width or reversed gaps
|
|
||||||
debug_assert!(previous_token_location < Some(next_token_location));
|
|
||||||
let previous_token_location = previous_token_location
|
|
||||||
.map(TriviaLocation::At)
|
|
||||||
.unwrap_or(TriviaLocation::StartOfFile);
|
|
||||||
let next_token_location = TriviaLocation::At(next_token_location);
|
|
||||||
// Enforce monotonic gaps: we record in file order
|
|
||||||
if let Some(last_right) = self.last_right_boundary {
|
|
||||||
debug_assert!(previous_token_location >= last_right);
|
|
||||||
}
|
}
|
||||||
self.last_right_boundary = Some(next_token_location);
|
|
||||||
let first_trivia_location = collected
|
|
||||||
.first()
|
|
||||||
.map(|token| TriviaLocation::At(token.location))
|
|
||||||
.expect("Provided trivia tokens array should not be empty.");
|
|
||||||
let last_trivia_location = collected
|
|
||||||
.last()
|
|
||||||
.map(|token| TriviaLocation::At(token.location))
|
|
||||||
.expect("Provided trivia tokens array should not be empty.");
|
|
||||||
// Ensure trivia lies strictly inside the gap
|
|
||||||
debug_assert!(previous_token_location < first_trivia_location);
|
|
||||||
debug_assert!(next_token_location > last_trivia_location);
|
|
||||||
// Ensure trivia locations are strictly increasing
|
|
||||||
debug_assert!(
|
|
||||||
collected
|
|
||||||
.windows(2)
|
|
||||||
.all(|window| window[0].location < window[1].location)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> TriviaIndex<'src> {
|
impl<'src> TriviaIndex<'src> {
|
||||||
/// Trivia immediately after the significant token at `location`.
|
/// Returns the trivia immediately after the significant token at
|
||||||
|
/// `position`.
|
||||||
///
|
///
|
||||||
/// Returns an empty slice if `location` is not pointing at
|
/// Returns an empty slice if `position` does not identify a recorded
|
||||||
/// a significant token or if no trivia was recorded after it.
|
/// significant token or if no trivia was recorded after it.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub(crate) fn after_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
pub(crate) fn trivia_after_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
|
||||||
self.slice_for(TriviaLocation::At(location), &self.after_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia immediately before the significant token at `location`.
|
|
||||||
///
|
|
||||||
/// Returns an empty slice if `location` is not pointing at
|
|
||||||
/// a significant token or if no trivia was recorded before it.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn before_token(&self, location: TokenLocation) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(TriviaLocation::At(location), &self.before_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia before any significant token.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn after_file_start(&self) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(TriviaLocation::StartOfFile, &self.after_map)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Trivia after the last significant token.
|
|
||||||
#[must_use]
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub(crate) fn before_file_end(&self) -> &[TriviaToken<'src>] {
|
|
||||||
self.slice_for(
|
self.slice_for(
|
||||||
TriviaLocation::At(TokenLocation::EndOfFile),
|
BoundaryLocation::Token(position),
|
||||||
&self.before_map,
|
&self.trivia_after_boundary,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Helper: return the recorded slice or an empty slice if none.
|
/// Returns the trivia immediately before the significant token at `position`.
|
||||||
#[track_caller]
|
///
|
||||||
|
/// Returns an empty slice if `position` does not identify a recorded
|
||||||
|
/// significant token or if no trivia was recorded before it.
|
||||||
|
#[must_use]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn slice_for(&self, key: TriviaLocation, map: &TriviaMap) -> &[TriviaToken<'src>] {
|
pub(crate) fn trivia_before_token(&self, position: TokenPosition) -> &[TriviaToken<'src>] {
|
||||||
if let Some(range) = map.get(&key) {
|
self.slice_for(
|
||||||
|
BoundaryLocation::Token(position),
|
||||||
|
&self.trivia_before_boundary,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the trivia before the first significant token.
|
||||||
|
///
|
||||||
|
/// If no significant tokens were recorded, returns all recorded trivia.
|
||||||
|
#[must_use]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub(crate) fn leading_trivia(&self) -> &[TriviaToken<'src>] {
|
||||||
|
self.slice_for(BoundaryLocation::StartOfFile, &self.trivia_after_boundary)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the trivia after the last significant token.
|
||||||
|
///
|
||||||
|
/// If no significant tokens were recorded, returns all recorded trivia.
|
||||||
|
#[must_use]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub(crate) fn trailing_trivia(&self) -> &[TriviaToken<'src>] {
|
||||||
|
self.slice_for(BoundaryLocation::EndOfFile, &self.trivia_before_boundary)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
fn slice_for(&self, key: BoundaryLocation, map: &TriviaRangeMap) -> &[TriviaToken<'src>] {
|
||||||
|
match map.get(&key) {
|
||||||
|
Some(range) => {
|
||||||
// Ranges are guaranteed to be valid by construction
|
// Ranges are guaranteed to be valid by construction
|
||||||
&self.tokens[range.start..range.end]
|
debug_assert!(range.start <= range.end);
|
||||||
} else {
|
debug_assert!(range.end <= self.tokens.len());
|
||||||
&[]
|
self.tokens.get(range.clone()).unwrap_or(&[])
|
||||||
|
}
|
||||||
|
None => &[],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
63
rottlib/tests/common.rs
Normal file
63
rottlib/tests/common.rs
Normal file
@ -0,0 +1,63 @@
|
|||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use rottlib::lexer::{Token, TokenData, TokenPosition, TokenizedFile};
|
||||||
|
|
||||||
|
pub fn fixture_path(name: &str) -> PathBuf {
|
||||||
|
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.join("tests")
|
||||||
|
.join("fixtures")
|
||||||
|
.join(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn read_fixture(name: &str) -> String {
|
||||||
|
let path = fixture_path(name);
|
||||||
|
std::fs::read_to_string(&path)
|
||||||
|
.unwrap_or_else(|e| panic!("failed to read fixture {}: {e}", path.display()))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_fixture(name: &str, f: impl for<'src> FnOnce(&'src str, TokenizedFile<'src>)) {
|
||||||
|
let source = read_fixture(name);
|
||||||
|
let file = TokenizedFile::tokenize(&source);
|
||||||
|
f(&source, file);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn line_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>, line: usize) -> Vec<&'src str> {
|
||||||
|
file.line_tokens(line).map(|(_, t)| t.lexeme).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn line_tokens<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec<Token> {
|
||||||
|
file.line_tokens(line).map(|(_, t)| t.token).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn line_positions<'src>(file: &TokenizedFile<'src>, line: usize) -> Vec<TokenPosition> {
|
||||||
|
file.line_tokens(line).map(|(pos, _)| pos).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn line_pairs<'file, 'src>(
|
||||||
|
file: &'file TokenizedFile<'src>,
|
||||||
|
line: usize,
|
||||||
|
) -> Vec<(Token, &'src str)> {
|
||||||
|
file.line_tokens(line)
|
||||||
|
.map(|(_, t)| (t.token, t.lexeme))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn all_lexemes<'file, 'src>(file: &'file TokenizedFile<'src>) -> Vec<&'src str> {
|
||||||
|
file.iter().map(|(_, t)| t.lexeme).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn all_tokens<'src>(file: &TokenizedFile<'src>) -> Vec<Token> {
|
||||||
|
file.iter().map(|(_, t)| t.token).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn token_at<'src>(file: &TokenizedFile<'src>, index: usize) -> Option<TokenData<'src>> {
|
||||||
|
file.token_at(TokenPosition(index))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reconstruct_source<'file, 'src>(file: &'file TokenizedFile<'src>) -> String {
|
||||||
|
file.iter().map(|(_, t)| t.lexeme).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_line<'src>(file: &TokenizedFile<'src>, needle: &str) -> Option<usize> {
|
||||||
|
(0..file.line_count()).find(|&line| file.line_text(line).as_deref() == Some(needle))
|
||||||
|
}
|
||||||
1578
rottlib/tests/fixtures/CommandAPI.uc
vendored
Normal file
1578
rottlib/tests/fixtures/CommandAPI.uc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
1199
rottlib/tests/fixtures/DBRecord.uc
vendored
Normal file
1199
rottlib/tests/fixtures/DBRecord.uc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
326
rottlib/tests/fixtures/KVehicle.uc
vendored
Normal file
326
rottlib/tests/fixtures/KVehicle.uc
vendored
Normal file
@ -0,0 +1,326 @@
|
|||||||
|
// Generic 'Karma Vehicle' base class that can be controlled by a Pawn.
|
||||||
|
|
||||||
|
class KVehicle extends Vehicle
|
||||||
|
native
|
||||||
|
abstract;
|
||||||
|
|
||||||
|
cpptext
|
||||||
|
{
|
||||||
|
#ifdef WITH_KARMA
|
||||||
|
virtual void PostNetReceive();
|
||||||
|
virtual void PostEditChange();
|
||||||
|
virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);
|
||||||
|
virtual void TickSimulated( FLOAT DeltaSeconds );
|
||||||
|
virtual void TickAuthoritative( FLOAT DeltaSeconds );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Effect spawned when vehicle is destroyed
|
||||||
|
var (KVehicle) class<Actor> DestroyEffectClass;
|
||||||
|
|
||||||
|
// Simple 'driving-in-rings' logic.
|
||||||
|
var (KVehicle) bool bAutoDrive;
|
||||||
|
|
||||||
|
// The factory that created this vehicle.
|
||||||
|
//var KVehicleFactory ParentFactory;
|
||||||
|
|
||||||
|
// Weapon system
|
||||||
|
var bool bVehicleIsFiring, bVehicleIsAltFiring;
|
||||||
|
|
||||||
|
const FilterFrames = 5;
|
||||||
|
var vector CameraHistory[FilterFrames];
|
||||||
|
var int NextHistorySlot;
|
||||||
|
var bool bHistoryWarmup;
|
||||||
|
|
||||||
|
// Useful function for plotting data to real-time graph on screen.
|
||||||
|
native final function GraphData(string DataName, float DataValue);
|
||||||
|
|
||||||
|
// if _RO_
|
||||||
|
function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
|
||||||
|
vector momentum, class<DamageType> damageType, optional int HitIndex)
|
||||||
|
// else UT
|
||||||
|
//function TakeDamage(int Damage, Pawn instigatedBy, Vector hitlocation,
|
||||||
|
// vector momentum, class<DamageType> damageType)
|
||||||
|
{
|
||||||
|
Super.TakeDamage(Damage,instigatedBy,HitLocation,Momentum,DamageType);
|
||||||
|
}
|
||||||
|
|
||||||
|
// You got some new info from the server (ie. VehicleState has some new info).
|
||||||
|
event VehicleStateReceived();
|
||||||
|
|
||||||
|
// Called when a parameter of the overall articulated actor has changed (like PostEditChange)
|
||||||
|
// The script must then call KUpdateConstraintParams or Actor Karma mutators as appropriate.
|
||||||
|
simulated event KVehicleUpdateParams();
|
||||||
|
|
||||||
|
// The pawn Driver has tried to take control of this vehicle
|
||||||
|
function bool TryToDrive(Pawn P)
|
||||||
|
{
|
||||||
|
if ( P.bIsCrouched || (P.Controller == None) || (Driver != None) || !P.Controller.bIsPlayer )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if ( !P.IsHumanControlled() || !P.Controller.IsInState('PlayerDriving') )
|
||||||
|
{
|
||||||
|
KDriverEnter(P);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Events called on driver entering/leaving vehicle
|
||||||
|
|
||||||
|
simulated function ClientKDriverEnter(PlayerController pc)
|
||||||
|
{
|
||||||
|
pc.myHUD.bCrosshairShow = false;
|
||||||
|
pc.myHUD.bShowWeaponInfo = false;
|
||||||
|
pc.myHUD.bShowPoints = false;
|
||||||
|
|
||||||
|
pc.bBehindView = true;
|
||||||
|
pc.bFreeCamera = true;
|
||||||
|
|
||||||
|
pc.SetRotation(rotator( vect(-1, 0, 0) >> Rotation ));
|
||||||
|
}
|
||||||
|
|
||||||
|
function KDriverEnter(Pawn P)
|
||||||
|
{
|
||||||
|
local PlayerController PC;
|
||||||
|
local Controller C;
|
||||||
|
|
||||||
|
// Set pawns current controller to control the vehicle pawn instead
|
||||||
|
Driver = P;
|
||||||
|
|
||||||
|
// Move the driver into position, and attach to car.
|
||||||
|
Driver.SetCollision(false, false);
|
||||||
|
Driver.bCollideWorld = false;
|
||||||
|
Driver.bPhysicsAnimUpdate = false;
|
||||||
|
Driver.Velocity = vect(0,0,0);
|
||||||
|
Driver.SetPhysics(PHYS_None);
|
||||||
|
Driver.SetBase(self);
|
||||||
|
|
||||||
|
// Disconnect PlayerController from Driver and connect to KVehicle.
|
||||||
|
C = P.Controller;
|
||||||
|
p.Controller.Unpossess();
|
||||||
|
Driver.SetOwner(C); // This keeps the driver relevant.
|
||||||
|
C.Possess(self);
|
||||||
|
|
||||||
|
PC = PlayerController(C);
|
||||||
|
if ( PC != None )
|
||||||
|
{
|
||||||
|
PC.ClientSetViewTarget(self); // Set playercontroller to view the vehicle
|
||||||
|
|
||||||
|
// Change controller state to driver
|
||||||
|
PC.GotoState('PlayerDriving');
|
||||||
|
|
||||||
|
ClientKDriverEnter(PC);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
simulated function ClientKDriverLeave(PlayerController pc)
|
||||||
|
{
|
||||||
|
pc.bBehindView = false;
|
||||||
|
pc.bFreeCamera = false;
|
||||||
|
// This removes any 'roll' from the look direction.
|
||||||
|
//exitLookDir = Vector(pc.Rotation);
|
||||||
|
//pc.SetRotation(Rotator(exitLookDir));
|
||||||
|
|
||||||
|
pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;
|
||||||
|
pc.myHUD.bShowWeaponInfo = pc.myHUD.default.bShowWeaponInfo;
|
||||||
|
pc.myHUD.bShowPoints = pc.myHUD.default.bShowPoints;
|
||||||
|
|
||||||
|
// Reset the view-smoothing
|
||||||
|
NextHistorySlot = 0;
|
||||||
|
bHistoryWarmup = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Called from the PlayerController when player wants to get out.
|
||||||
|
function bool KDriverLeave(bool bForceLeave)
|
||||||
|
{
|
||||||
|
local PlayerController pc;
|
||||||
|
local int i;
|
||||||
|
local bool havePlaced;
|
||||||
|
local vector HitLocation, HitNormal, tryPlace;
|
||||||
|
|
||||||
|
// Do nothing if we're not being driven
|
||||||
|
if(Driver == None)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Before we can exit, we need to find a place to put the driver.
|
||||||
|
// Iterate over array of possible exit locations.
|
||||||
|
|
||||||
|
if (!bRemoteControlled)
|
||||||
|
{
|
||||||
|
|
||||||
|
Driver.bCollideWorld = true;
|
||||||
|
Driver.SetCollision(true, true);
|
||||||
|
|
||||||
|
havePlaced = false;
|
||||||
|
for(i=0; i < ExitPositions.Length && havePlaced == false; i++)
|
||||||
|
{
|
||||||
|
//Log("Trying Exit:"$i);
|
||||||
|
|
||||||
|
tryPlace = Location + (ExitPositions[i] >> Rotation);
|
||||||
|
|
||||||
|
// First, do a line check (stops us passing through things on exit).
|
||||||
|
if( Trace(HitLocation, HitNormal, tryPlace, Location, false) != None )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Then see if we can place the player there.
|
||||||
|
if( !Driver.SetLocation(tryPlace) )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
havePlaced = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we could not find a place to put the driver, leave driver inside as before.
|
||||||
|
if(!havePlaced && !bForceLeave)
|
||||||
|
{
|
||||||
|
Log("Could not place driver.");
|
||||||
|
|
||||||
|
Driver.bCollideWorld = false;
|
||||||
|
Driver.SetCollision(false, false);
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pc = PlayerController(Controller);
|
||||||
|
ClientKDriverLeave(pc);
|
||||||
|
|
||||||
|
// Reconnect PlayerController to Driver.
|
||||||
|
pc.Unpossess();
|
||||||
|
pc.Possess(Driver);
|
||||||
|
|
||||||
|
pc.ClientSetViewTarget(Driver); // Set playercontroller to view the persone that got out
|
||||||
|
|
||||||
|
Controller = None;
|
||||||
|
|
||||||
|
Driver.PlayWaiting();
|
||||||
|
Driver.bPhysicsAnimUpdate = Driver.Default.bPhysicsAnimUpdate;
|
||||||
|
|
||||||
|
// Do stuff on client
|
||||||
|
//pc.ClientSetBehindView(false);
|
||||||
|
//pc.ClientSetFixedCamera(true);
|
||||||
|
|
||||||
|
if (!bRemoteControlled)
|
||||||
|
{
|
||||||
|
|
||||||
|
Driver.Acceleration = vect(0, 0, 24000);
|
||||||
|
Driver.SetPhysics(PHYS_Falling);
|
||||||
|
Driver.SetBase(None);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Car now has no driver
|
||||||
|
Driver = None;
|
||||||
|
|
||||||
|
// Put brakes on before you get out :)
|
||||||
|
Throttle=0;
|
||||||
|
Steering=0;
|
||||||
|
|
||||||
|
// Stop firing when you get out!
|
||||||
|
bVehicleIsFiring = false;
|
||||||
|
bVehicleIsAltFiring = false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Special calc-view for vehicles
|
||||||
|
simulated function bool SpecialCalcView(out actor ViewActor, out vector CameraLocation, out rotator CameraRotation )
|
||||||
|
{
|
||||||
|
local vector CamLookAt, HitLocation, HitNormal;
|
||||||
|
local PlayerController pc;
|
||||||
|
local int i, averageOver;
|
||||||
|
|
||||||
|
pc = PlayerController(Controller);
|
||||||
|
|
||||||
|
// Only do this mode we have a playercontroller viewing this vehicle
|
||||||
|
if(pc == None || pc.ViewTarget != self)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
ViewActor = self;
|
||||||
|
CamLookAt = Location + (vect(-100, 0, 100) >> Rotation);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
// Smooth lookat position over a few frames.
|
||||||
|
CameraHistory[NextHistorySlot] = CamLookAt;
|
||||||
|
NextHistorySlot++;
|
||||||
|
|
||||||
|
if(bHistoryWarmup)
|
||||||
|
averageOver = NextHistorySlot;
|
||||||
|
else
|
||||||
|
averageOver = FilterFrames;
|
||||||
|
|
||||||
|
CamLookAt = vect(0, 0, 0);
|
||||||
|
for(i=0; i<averageOver; i++)
|
||||||
|
CamLookAt += CameraHistory[i];
|
||||||
|
|
||||||
|
CamLookAt /= float(averageOver);
|
||||||
|
|
||||||
|
if(NextHistorySlot == FilterFrames)
|
||||||
|
{
|
||||||
|
NextHistorySlot = 0;
|
||||||
|
bHistoryWarmup=false;
|
||||||
|
}
|
||||||
|
//////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
CameraLocation = CamLookAt + (vect(-600, 0, 0) >> CameraRotation);
|
||||||
|
|
||||||
|
if( Trace( HitLocation, HitNormal, CameraLocation, CamLookAt, false, vect(10, 10, 10) ) != None )
|
||||||
|
{
|
||||||
|
CameraLocation = HitLocation;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
simulated function Destroyed()
|
||||||
|
{
|
||||||
|
// If there was a driver in the vehicle, destroy him too
|
||||||
|
if ( Driver != None )
|
||||||
|
Driver.Destroy();
|
||||||
|
|
||||||
|
// Trigger any effects for destruction
|
||||||
|
if(DestroyEffectClass != None)
|
||||||
|
spawn(DestroyEffectClass, , , Location, Rotation);
|
||||||
|
|
||||||
|
Super.Destroyed();
|
||||||
|
}
|
||||||
|
|
||||||
|
simulated event Tick(float deltaSeconds)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
// Includes properties from KActor
|
||||||
|
defaultproperties
|
||||||
|
{
|
||||||
|
Steering=0
|
||||||
|
Throttle=0
|
||||||
|
|
||||||
|
ExitPositions(0)=(X=0,Y=0,Z=0)
|
||||||
|
|
||||||
|
DrivePos=(X=0,Y=0,Z=0)
|
||||||
|
DriveRot=()
|
||||||
|
|
||||||
|
bHistoryWarmup = true;
|
||||||
|
|
||||||
|
|
||||||
|
Physics=PHYS_Karma
|
||||||
|
bEdShouldSnap=True
|
||||||
|
bStatic=False
|
||||||
|
bShadowCast=False
|
||||||
|
bCollideActors=True
|
||||||
|
bCollideWorld=False
|
||||||
|
bProjTarget=True
|
||||||
|
bBlockActors=True
|
||||||
|
bBlockNonZeroExtentTraces=True
|
||||||
|
bBlockZeroExtentTraces=True
|
||||||
|
bWorldGeometry=False
|
||||||
|
bBlockKarma=True
|
||||||
|
bAcceptsProjectors=True
|
||||||
|
bCanBeBaseForPawns=True
|
||||||
|
bAlwaysRelevant=True
|
||||||
|
RemoteRole=ROLE_SimulatedProxy
|
||||||
|
bNetInitialRotation=True
|
||||||
|
bSpecialCalcView=True
|
||||||
|
//bSpecialHUD=true
|
||||||
|
}
|
||||||
135
rottlib/tests/fixtures_tokenization.rs
Normal file
135
rottlib/tests/fixtures_tokenization.rs
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
use std::{fs, path::PathBuf};
|
||||||
|
|
||||||
|
use rottlib::lexer::{Keyword, Token, TokenizedFile};
|
||||||
|
|
||||||
|
/// Returns the path to a fixture file in `tests/fixtures/`.
|
||||||
|
fn fixture_file_path(name: &str) -> PathBuf {
|
||||||
|
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||||
|
.join("tests")
|
||||||
|
.join("fixtures")
|
||||||
|
.join(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Loads a fixture source file as UTF-8 text.
|
||||||
|
fn read_fixture_source(name: &str) -> String {
|
||||||
|
fs::read_to_string(fixture_file_path(name))
|
||||||
|
.unwrap_or_else(|e| panic!("failed to read fixture {name}: {e}"))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the token at the given token index on a physical line.
|
||||||
|
///
|
||||||
|
/// Here `line` is 1-based, to match human line numbers in fixture files.
|
||||||
|
/// `token_index` is 0-based within `TokenizedFile::line_tokens`.
|
||||||
|
fn token_on_line(file: &TokenizedFile<'_>, line: usize, token_index: usize) -> Option<Token> {
|
||||||
|
file.line_tokens(line - 1)
|
||||||
|
.nth(token_index)
|
||||||
|
.map(|(_, token_data)| token_data.token)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns reconstructed visible text for a physical line.
|
||||||
|
///
|
||||||
|
/// Here `line` is 1-based, to match human line numbers in fixture files.
|
||||||
|
fn line_text(file: &TokenizedFile<'_>, line: usize) -> Option<String> {
|
||||||
|
file.line_text(line - 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn command_api_fixture_queries() {
|
||||||
|
let source = read_fixture_source("CommandAPI.uc");
|
||||||
|
let file = TokenizedFile::tokenize(&source);
|
||||||
|
assert_eq!(file.line_count(), 1578);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 704).as_deref(),
|
||||||
|
Some(
|
||||||
|
"public final function CommandConfigInfo ResolveCommandForUserID(BaseText itemName, UserID id) {"
|
||||||
|
)
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 806).as_deref(),
|
||||||
|
Some(" _.memory.Free(wrapper);")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 1274).as_deref(),
|
||||||
|
Some("/// Method must be called after [`Voting`] with a given name is added.")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 14).as_deref(),
|
||||||
|
Some(" * Acedia is distributed in the hope that it will be useful,")
|
||||||
|
);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 22, 0).unwrap();
|
||||||
|
assert_eq!(token, Token::Keyword(Keyword::Class));
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 1577, 0).unwrap();
|
||||||
|
assert_eq!(token, Token::Keyword(Keyword::DefaultProperties));
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 649, 4).unwrap();
|
||||||
|
assert_eq!(token, Token::Whitespace);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dbrecord_fixture_queries() {
|
||||||
|
let source = read_fixture_source("DBRecord.uc");
|
||||||
|
let file = TokenizedFile::tokenize(&source);
|
||||||
|
assert_eq!(file.line_count(), 1199);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 149).as_deref(),
|
||||||
|
Some(" * However, JSON pointers are not convenient or efficient enough for that,")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 787).as_deref(),
|
||||||
|
Some(" * 3. 'number' -> either `IntBox` or `FloatBox`, depending on")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 1023).as_deref(),
|
||||||
|
Some(" bool makeMutable)")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 29).as_deref(),
|
||||||
|
Some(" config(AcediaDB);")
|
||||||
|
);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 565, 0).unwrap();
|
||||||
|
assert_eq!(token, Token::BlockComment);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 467, 10).unwrap();
|
||||||
|
assert_eq!(token, Token::Identifier);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 467, 9).unwrap();
|
||||||
|
assert_eq!(token, Token::LeftParenthesis);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn kvehicle_fixture_queries() {
|
||||||
|
let source = read_fixture_source("KVehicle.uc");
|
||||||
|
let file = TokenizedFile::tokenize(&source);
|
||||||
|
assert_eq!(file.line_count(), 326);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 12).as_deref(),
|
||||||
|
Some(" virtual void setPhysics(BYTE NewPhysics, AActor *NewFloor, FVector NewFloorV);")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 127).as_deref(),
|
||||||
|
Some(" pc.myHUD.bCrosshairShow = pc.myHUD.default.bCrosshairShow;")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 264).as_deref(),
|
||||||
|
Some(" //////////////////////////////////////////////////////")
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
line_text(&file, 299).as_deref(),
|
||||||
|
Some(" ExitPositions(0)=(X=0,Y=0,Z=0)")
|
||||||
|
);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 17, 0).unwrap();
|
||||||
|
assert_eq!(token, Token::Newline);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 20, 7).unwrap();
|
||||||
|
assert_eq!(token, Token::Less);
|
||||||
|
|
||||||
|
let token = token_on_line(&file, 246, 2).unwrap();
|
||||||
|
assert_eq!(token, Token::Increment);
|
||||||
|
}
|
||||||
@ -37,7 +37,7 @@ impl tower_lsp::LanguageServer for RottLanguageServer {
|
|||||||
// Measure lexing performance to track parser responsiveness.
|
// Measure lexing performance to track parser responsiveness.
|
||||||
let start_time = std::time::Instant::now();
|
let start_time = std::time::Instant::now();
|
||||||
let has_errors =
|
let has_errors =
|
||||||
rottlib::lexer::TokenizedFile::from_str(¶ms.text_document.text).has_errors();
|
rottlib::lexer::TokenizedFile::tokenize(¶ms.text_document.text).has_errors();
|
||||||
let elapsed_time = start_time.elapsed();
|
let elapsed_time = start_time.elapsed();
|
||||||
|
|
||||||
self.client
|
self.client
|
||||||
|
|||||||
97
test.uc
Normal file
97
test.uc
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/// BOF line comment
|
||||||
|
/* BOF block comment */
|
||||||
|
|
||||||
|
class TestClass extends Actor
|
||||||
|
abstract
|
||||||
|
native;
|
||||||
|
//nativereplication;
|
||||||
|
|
||||||
|
/* One blank line follows to test has_blank_line_after() */
|
||||||
|
|
||||||
|
function int fuck_you(int a, float b, string c)
|
||||||
|
{
|
||||||
|
// ---- locals with an error to trigger recovery to comma/semicolon
|
||||||
|
local int i, /* oops */ , k;
|
||||||
|
local int a, b, c;
|
||||||
|
|
||||||
|
// ---- builtins: valid + error + various initializers
|
||||||
|
int a = 1, b, , c = 3;
|
||||||
|
float f = (1.0 + 2.0) * 0.5;
|
||||||
|
bool flag = true;
|
||||||
|
string s = "hi\n\"ok\"";
|
||||||
|
name tag;
|
||||||
|
array nums;
|
||||||
|
|
||||||
|
// ---- label + goto
|
||||||
|
start:
|
||||||
|
goto start2;
|
||||||
|
|
||||||
|
// ---- if / else with tail-as-value and missing semicolons inside
|
||||||
|
if (a + c > 0) {
|
||||||
|
while (a < 5) {
|
||||||
|
if (flag) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
a + 1; // ok
|
||||||
|
continue
|
||||||
|
} // missing ';' before '}' should be fine (SelfTerminating)
|
||||||
|
} else {
|
||||||
|
{
|
||||||
|
a + 2;
|
||||||
|
b // tail expression (no ';') becomes block tail
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- for with header pieces using statement-as-value
|
||||||
|
for (i; i < 10; i += 1) {
|
||||||
|
j + i;
|
||||||
|
i + j // no semicolon, next is '}' so this is a tail
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- assert with a block-as-value (statement-as-value)
|
||||||
|
assert {
|
||||||
|
i = i + 1;
|
||||||
|
i // tail is the value of the block
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---- foreach (paren and no-paren forms)
|
||||||
|
foreach (nums) {
|
||||||
|
i++
|
||||||
|
}
|
||||||
|
foreach nums {
|
||||||
|
--i; // prefix and postfix in play
|
||||||
|
j--
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- do ... until (paren and no-paren) + semicolon handling
|
||||||
|
do {
|
||||||
|
i = i + 1
|
||||||
|
} until (i > 3);
|
||||||
|
do i = i + 1; until i > 5;
|
||||||
|
|
||||||
|
// ---- switch with multi-label case, recovery, and default
|
||||||
|
switch (a + c) {
|
||||||
|
case 0:
|
||||||
|
case 1:
|
||||||
|
a = a + 10
|
||||||
|
// missing ';' here forces recovery to next boundary (case/default/})
|
||||||
|
case 2:
|
||||||
|
assert (a > 0); // regular statement
|
||||||
|
break;
|
||||||
|
case 3, 4:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
// some stray token sequence to poke "unexpected token in switch body"
|
||||||
|
/* block comment with
|
||||||
|
newlines */
|
||||||
|
a + ; // malformed expr; recover to boundary
|
||||||
|
continue; // legal statement after recovery
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- second label target for goto
|
||||||
|
start2:
|
||||||
|
return a; // final return
|
||||||
|
}
|
||||||
|
|
||||||
|
// EOF trailing line comment
|
||||||
|
/* EOF trailing block comment */
|
||||||
10
test_full.uc
Normal file
10
test_full.uc
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
// #[config(JustConfig)]
|
||||||
|
abstract class NewWay extends AcediaObject {
|
||||||
|
// #[config(MaxWavesAmount)]
|
||||||
|
var int _value;
|
||||||
|
}
|
||||||
|
|
||||||
|
class TestClass extends Actor
|
||||||
|
abstract
|
||||||
|
native
|
||||||
|
nativereplication;
|
||||||
Loading…
Reference in New Issue
Block a user