From 4d0ce44f7f134d2e8db1a54a95827cd4b3161b6b Mon Sep 17 00:00:00 2001 From: maxrdz Date: Mon, 4 Mar 2024 16:07:03 -0700 Subject: [PATCH] dcparser: Begin work on propogating data up the parse tree --- libdonet/src/dcparser.rs | 166 ++++++++++++++++++++++++++++----------- 1 file changed, 120 insertions(+), 46 deletions(-) diff --git a/libdonet/src/dcparser.rs b/libdonet/src/dcparser.rs index 3986bb8..027521e 100644 --- a/libdonet/src/dcparser.rs +++ b/libdonet/src/dcparser.rs @@ -18,10 +18,9 @@ //! Definition of the DC language context free grammar for the //! LALR(1) parser processing the stream of lexical tokens. -/* The following suppress linting warnings, which are okay to ignore - * as they go off in the parser grammar definitions, which we are writing - * just as the plex crate readme says we should, so everything is okay. - */ +// The following suppress linting warnings, which are okay to ignore +// as they go off in the parser grammar definitions, which we are writing +// just as the plex crate readme says we should, so everything is okay. #![allow( clippy::type_complexity, clippy::redundant_field_names, @@ -31,33 +30,40 @@ clippy::let_unit_value )] -use crate::dcfile::{DCFile, DCFileInterface, DCImport}; -use crate::dckeyword; -use crate::dclass; +use crate::dcarray::*; +use crate::dcatomic::*; +use crate::dcfield::*; +use crate::dcfile::*; +use crate::dckeyword; // Avoid wildcard import due to conflict with DCToken variant. +use crate::dclass; // Same reason as comment above. use crate::dclexer::DCToken::*; use crate::dclexer::{DCToken, Span}; +use crate::dcmolecular::*; +use crate::dcnumeric::*; +use crate::dcparameter::*; use crate::dcstruct; +use crate::dctype::*; use plex::parser; +use std::mem::discriminant; use std::sync::{Arc, Mutex}; -/* To write the DC file elements to memory just as Panda and Astron do, I - * initially stored the DCFile struct on static memory as mutable. This required - * the unsafe block { } whenever it was accessed or modified, but it did not cause - * undefined behavior ... until unit testing. Of course, it is ideal to not use - * unsafe techniques from the beginning, so I decided to make use of Plex's features - * by assigning types to the grammar's non-terminals and propogating the elements - * bottom-up. (as LALR(1) parsers are 'bottom-up' parsers, where they start by - * producing the 'edge' productions, until the parser reduces all non-terminals - * to the root production of the language grammar.) - * - * Since we are propogating elements from the bottom of the parse tree and upwards, - * the return types of non-terminals closer to the root production get bigger and bigger, - * as they're carrying more and more of the total elements in the DC file until they are - * all 'plugged in together' into the DC file struct once we reduce to the root production. - * - * Even though it may *appear* visually ugly, this is the safest, and cleanest, - * approach to assembling the DC file structure in memory using Plex. - */ +// To write the DC file elements to memory just as Panda and Astron do, I +// initially stored the DCFile struct on static memory as mutable. This required +// the unsafe block { } whenever it was accessed or modified, but it did not cause +// undefined behavior ... until unit testing. Of course, it is ideal to not use +// unsafe techniques from the beginning, so I decided to make use of Plex's features +// by assigning types to the grammar's non-terminals and propogating the elements +// bottom-up. (as LALR(1) parsers are 'bottom-up' parsers, where they start by +// producing the 'edge', or 'leaf', productions, until the parser reduces all +// non-terminals to the root production of the language grammar.) +// +// Since we are propogating elements from the bottom of the parse tree and upwards, +// the return types of non-terminals closer to the root production get bigger and bigger, +// as they're carrying more and more of the total elements in the DC file until they are +// all 'plugged in together' into the DC file struct once we reduce to the root production. +// +// Even though it may *appear* visually ugly, this is the safest, and cleanest, +// approach to assembling the DC file structure in memory. enum TypeDeclaration { PythonImport(Vec), @@ -65,7 +71,22 @@ enum TypeDeclaration { StructType(dcstruct::DCStruct), SwitchType(Option), DClassType(dclass::DClass), - TypedefType(Option), + TypedefType(DCTypeDefinition), +} + +/// Paired with the `char_or_u16` production. +#[derive(Clone, Copy)] +enum CharOrU16 { + Char(char), + U16(u16), +} + +/// Paired with the `char_or_number` production. +#[derive(Clone, Copy)] +enum CharOrNumber { + Char(char), + I64(i64), + F64(f64), } parser! { @@ -134,7 +155,7 @@ parser! { struct_type[strct] => TypeDeclaration::StructType(strct), switch_type => TypeDeclaration::SwitchType(None), distributed_class_type[dclass] => TypeDeclaration::DClassType(dclass), - type_definition => TypeDeclaration::TypedefType(None), + type_definition => TypeDeclaration::TypedefType(DCTypeDefinition::new()), } // ---------- Python-style Imports ---------- // @@ -613,16 +634,65 @@ parser! { numeric_type_token[_] OpenParenthesis floating_point_type[_] CloseParenthesis => {}, } - numeric_range: () { - epsilon => {}, - char_or_number => {}, - char_or_number Hyphen char_or_number => {}, + numeric_range: Option { + epsilon => None, + + char_or_number[v] => match v { + CharOrNumber::Char(c) => { + let min_max: u64 = u64::from(c); + Some(DCNumericRange::new_unsigned_integer_range(min_max, min_max)) + }, + CharOrNumber::I64(i) => Some(DCNumericRange::new_integer_range(i, i)), + CharOrNumber::F64(f) => Some(DCNumericRange::new_floating_point_range(f, f)), + }, + + char_or_number[min] Hyphen char_or_number[max] => { + assert!( + discriminant(&min) == discriminant(&max), + "Cannot define a numeric range with a min and max of different data types!", + ); + + match min { + CharOrNumber::Char(min_c) => { + let min_u64: u64 = u64::from(min_c); + let max_u64: u64 = match max { + CharOrNumber::Char(max_c) => u64::from(max_c), + _ => panic!("This isn't possible."), + }; + Some(DCNumericRange::new_unsigned_integer_range(min_u64, max_u64)) + }, + CharOrNumber::I64(min_i) => Some(DCNumericRange::new_integer_range(min_i, match max { + CharOrNumber::I64(max_i) => max_i, + _ => panic!("This isn't possible."), + })), + CharOrNumber::F64(min_f) => Some(DCNumericRange::new_floating_point_range(min_f, match max { + CharOrNumber::F64(max_f) => max_f, + _ => panic!("This isn't possible."), + })), + } + }, } - array_range: () { - epsilon => {}, - char_or_u16 => {}, - char_or_u16 Hyphen char_or_u16 => {}, + array_range: Option { + epsilon => None, + char_or_u16[v] => match v { + CharOrU16::Char(c) => Some(DCNumericRange::new_unsigned_integer_range(u64::from(c), u64::from(c))), + CharOrU16::U16(u) => Some(DCNumericRange::new_unsigned_integer_range(u64::from(u), u64::from(u))), + }, + char_or_u16[min] Hyphen char_or_u16[max] => { + let min_uint: u64; + let max_uint: u64; + + match min { + CharOrU16::Char(c) => min_uint = u64::from(c), + CharOrU16::U16(u) => min_uint = u64::from(u), + } + match max { + CharOrU16::Char(c) => max_uint = u64::from(c), + CharOrU16::U16(u) => max_uint = u64::from(u), + } + Some(DCNumericRange::new_unsigned_integer_range(min_uint, max_uint)) + }, } // Both of these types represent a sized type (aka, array type) @@ -641,10 +711,15 @@ parser! { floating_point_type[tok] => tok, } - char_or_number: () { - CharT => {}, - signed_integer[_] => {}, - number[_] => {}, + char_or_number: CharOrNumber { + CharacterLiteral(c) => CharOrNumber::Char(c), + signed_integer[v] => CharOrNumber::I64(v), + + number[tok] => match tok { + DecimalLiteral(dl) => CharOrNumber::I64(dl), + FloatLiteral(fl) => CharOrNumber::F64(fl), + _ => panic!("'number' non-terminal returned an unexpected DC token!"), + }, } signed_integer: i64 { @@ -657,15 +732,14 @@ parser! { FloatLiteral(fl) => FloatLiteral(fl), } - char_or_u16: () { - CharT => {}, - unsigned_16_bit_int[_] => {}, + char_or_u16: CharOrU16 { + CharacterLiteral(cl) => CharOrU16::Char(cl), + unsigned_16_bit_int[u] => CharOrU16::U16(u), } - /* In Panda's parser, this production is known as 'small_unsigned_integer'. - * C++ standard for an 'unsigned int' size is at least 16 bits. - * 16 bits for LP32 data model; ILP32, LLP64, & LP64 are 32 bits. - */ + // In Panda's parser, this production is known as 'small_unsigned_integer'. + // C++ standard for an 'unsigned int' size is at least 16 bits. + // 16 bits for LP32 data model; ILP32, LLP64, & LP64 are 32 bits. unsigned_16_bit_int: u16 { DecimalLiteral(v) => { match u16::try_from(v) {