From ba6ccb5df5a053549033a16fdfb391f6b3f36f2a Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 13 Dec 2024 15:51:18 +0100 Subject: [PATCH 1/8] add string type --- src/codegen/check.rs | 6 +++- src/codegen/mod.rs | 60 +++++++++++++++++++++++++++++++++++++++- src/codegen/tests.rs | 28 +++++++++++++++++-- src/lower/eval.rs | 5 ++++ src/lower/ir.rs | 16 ++++++++++- src/lower/mod.rs | 29 ++++++++++++++++--- src/parser/expr.rs | 1 + src/runtime/mod.rs | 24 ++++++++++++++++ src/runtime/ty.rs | 14 +++++++++- src/typechecker/info.rs | 5 +++- src/typechecker/types.rs | 3 +- 11 files changed, 179 insertions(+), 12 deletions(-) diff --git a/src/codegen/check.rs b/src/codegen/check.rs index 75599321..3cf38bf6 100644 --- a/src/codegen/check.rs +++ b/src/codegen/check.rs @@ -9,7 +9,9 @@ use crate::{ types::{Primitive, Type}, }, }; -use std::{any::TypeId, fmt::Display, mem::MaybeUninit, net::IpAddr}; +use std::{ + any::TypeId, fmt::Display, mem::MaybeUninit, net::IpAddr, sync::Arc, +}; #[derive(Debug)] pub enum FunctionRetrievalError { @@ -86,6 +88,7 @@ fn check_roto_type( let ASN: TypeId = TypeId::of::(); let IPADDR: TypeId = TypeId::of::(); let PREFIX: TypeId = TypeId::of::(); + let STRING: TypeId = TypeId::of::>(); let Some(rust_ty) = registry.get(rust_ty) else { return Err(TypeMismatch { @@ -121,6 +124,7 @@ fn check_roto_type( x if x == ASN => Type::Primitive(Primitive::Asn), x if x == IPADDR => Type::Primitive(Primitive::IpAddr), x if x == PREFIX => Type::Primitive(Primitive::Prefix), + x if x == STRING => Type::Primitive(Primitive::String), _ => panic!(), }; if expected_roto == roto_ty { diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 09577996..47648f1f 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -211,6 +211,9 @@ struct ModuleBuilder { /// Signature to use for calls to `drop` drop_signature: Signature, + /// Signature to use for calls to `init_string` + init_string_signature: Signature, + context_description: ContextDescription, } @@ -231,6 +234,9 @@ struct FuncGen<'c> { /// Signature to use for calls to `drop` drop_signature: SigRef, + + /// Signature to use for calls to `init_string` + init_string_signature: SigRef, } // We use `with_aligned` to make sure that we notice if anything is @@ -279,6 +285,17 @@ pub fn codegen( .params .push(AbiParam::new(isa.pointer_type())); + let mut init_string_signature = jit.make_signature(); + init_string_signature + .params + .push(AbiParam::new(isa.pointer_type())); + init_string_signature + .params + .push(AbiParam::new(isa.pointer_type())); + init_string_signature + .params + .push(AbiParam::new(cranelift::codegen::ir::types::I32)); + let mut module = ModuleBuilder { functions: HashMap::new(), runtime_functions: HashMap::new(), @@ -289,6 +306,7 @@ pub fn codegen( type_info, drop_signature, clone_signature, + init_string_signature, context_description, }; @@ -441,6 +459,8 @@ impl ModuleBuilder { .import_signature(self.drop_signature.clone()), clone_signature: builder .import_signature(self.clone_signature.clone()), + init_string_signature: builder + .import_signature(self.init_string_signature.clone()), module: self, builder, scope: *scope, @@ -555,7 +575,7 @@ impl<'c> FuncGen<'c> { args.next().unwrap(), ); - if dbg!(return_ptr) { + if return_ptr { self.def( self.module.variable_map[&Var { scope: self.scope, @@ -927,6 +947,44 @@ impl<'c> FuncGen<'c> { let to = self.variable(to, ty); self.def(to, val); } + ir::Instruction::InitString { + to, + string, + init_func, + } => { + let data_id = self + .module + .inner + .declare_anonymous_data(false, false) + .unwrap(); + + let mut description = DataDescription::new(); + description.define(string.clone().into_bytes().into()); + self.module + .inner + .define_data(data_id, &description) + .unwrap(); + + let global_value = self + .module + .inner + .declare_data_in_func(data_id, self.builder.func); + + let pointer_ty = self.module.isa.pointer_type(); + let init_func = self.ins().iconst( + pointer_ty, + *init_func as *mut u8 as usize as i64, + ); + let data = self.ins().global_value(pointer_ty, global_value); + let len = self.ins().iconst(I32, string.len() as u64 as i64); + + let (to, _) = self.operand(&Operand::Place(to.clone())); + self.builder.ins().call_indirect( + self.init_string_signature, + init_func, + &[to, data, len], + ); + } } } diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index 76d16df5..8af01a18 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -1,4 +1,7 @@ -use std::{net::IpAddr, sync::atomic::AtomicUsize}; +use std::{ + net::IpAddr, + sync::{atomic::AtomicUsize, Arc}, +}; use inetnum::{addr::Prefix, asn::Asn}; use roto_macros::{roto_function, roto_static_method}; @@ -55,7 +58,6 @@ fn accept() { .expect("No function found (or mismatched types)"); let res = f.call(&mut ()); - dbg!(std::mem::size_of::>()); assert_eq!(res, Verdict::Accept(())); } @@ -969,3 +971,25 @@ fn use_context() { let output = f.call(&mut ctx); assert_eq!(output, Verdict::Accept(11)); } + +#[test] +fn string() { + let s = src!( + r#" + filter-map main() { + apply { + accept "hello" + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (), Verdict, ()>>("main") + .unwrap(); + + let res = f.call(&mut ()); + assert_eq!(res, Verdict::Accept("hello".into())); +} diff --git a/src/lower/eval.rs b/src/lower/eval.rs index c825a00d..da95836a 100644 --- a/src/lower/eval.rs +++ b/src/lower/eval.rs @@ -633,6 +633,11 @@ pub fn eval( }; vars.insert(to.clone(), IrValue::Pointer(res)); } + Instruction::InitString { + to: _, + string: _, + init_func: _, + } => todo!(), } program_counter += 1; diff --git a/src/lower/ir.rs b/src/lower/ir.rs index 04f6921c..7e82baf0 100644 --- a/src/lower/ir.rs +++ b/src/lower/ir.rs @@ -20,7 +20,7 @@ //! //! [cranelift]: https://docs.rs/cranelift-frontend/latest/cranelift_frontend/ -use std::fmt::Display; +use std::{fmt::Display, sync::Arc}; use crate::{ ast::Identifier, @@ -90,6 +90,13 @@ pub enum Instruction { ty: IrType, }, + /// Create string + InitString { + to: Var, + string: String, + init_func: unsafe extern "C" fn(*mut Arc, *mut u8, u32), + }, + /// Call a function. Call { to: Option<(Var, IrType)>, @@ -414,6 +421,13 @@ impl<'a> IrPrinter<'a> { .collect::>() .join(", ") ), + InitString { + to, + string, + init_func: _, + } => { + format!("{}: String = \"{string}\"", self.var(to),) + } Return(None) => "return".to_string(), Return(Some(v)) => { format!("return {}", self.operand(v)) diff --git a/src/lower/mod.rs b/src/lower/mod.rs index d54ce986..14ad8ee8 100644 --- a/src/lower/mod.rs +++ b/src/lower/mod.rs @@ -260,8 +260,6 @@ impl<'r> Lowerer<'r> { x => (Some(self.lower_type(&x)), false), }; - dbg!(return_ptr); - let ir_signature = ir::Signature { parameters: parameter_types .iter() @@ -1056,7 +1054,27 @@ impl<'r> Lowerer<'r> { /// Lower a literal fn literal(&mut self, lit: &Meta) -> Operand { match &lit.node { - Literal::String(_) => todo!(), + Literal::String(s) => { + let size = std::mem::size_of::() as u32; + let align = std::mem::align_of::(); + let align_shift = align.ilog2() as u8; + + let to = self.new_tmp(); + + self.add(Instruction::Alloc { + to: to.clone(), + size, + align_shift, + }); + + self.add(Instruction::InitString { + to: to.clone(), + string: s.clone(), + init_func: self.runtime.string_init_function, + }); + + to.into() + } Literal::Asn(n) => IrValue::Asn(*n).into(), Literal::IpAddress(addr) => { let to = self.new_tmp(); @@ -1174,7 +1192,9 @@ impl<'r> Lowerer<'r> { | Type::NamedRecord(..) | Type::Enum(..) | Type::Verdict(..) - | Type::Primitive(Primitive::IpAddr | Primitive::Prefix) + | Type::Primitive( + Primitive::IpAddr | Primitive::Prefix | Primitive::String + ) | Type::BuiltIn(..) ) } @@ -1193,6 +1213,7 @@ impl<'r> Lowerer<'r> { Type::Primitive(Primitive::I64) => IrType::I64, Type::Primitive(Primitive::Asn) => IrType::Asn, Type::Primitive(Primitive::IpAddr) => IrType::Pointer, + Type::Primitive(Primitive::String) => IrType::Pointer, Type::IntVar(_) => IrType::I32, Type::BuiltIn(_, _) => IrType::ExtPointer, x if self.is_reference_type(&x) => IrType::Pointer, diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 2ab9ce64..241a13ee 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -392,6 +392,7 @@ impl Parser<'_, '_> { | Token::IpV4(_) | Token::IpV6(_) | Token::Asn(_) + | Token::String(_) ) } diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 49591603..0d8ece59 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -33,10 +33,13 @@ pub mod ty; pub mod val; pub mod verdict; +use core::{slice, str}; use std::{ any::{type_name, TypeId}, collections::HashMap, net::{IpAddr, Ipv4Addr, Ipv6Addr}, + ptr, + sync::Arc, }; use context::ContextDescription; @@ -59,6 +62,8 @@ pub struct Runtime { pub functions: Vec, pub constants: HashMap, pub type_registry: TypeRegistry, + pub string_init_function: + unsafe extern "C" fn(*mut Arc, *mut u8, u32), } #[derive(Debug)] @@ -86,6 +91,12 @@ unsafe extern "C" fn extern_drop(x: *mut ()) { std::ptr::read(x); } +unsafe extern "C" fn init_string(s: *mut Arc, data: *mut u8, len: u32) { + let slice = unsafe { slice::from_raw_parts(data, len as usize) }; + let str = unsafe { str::from_utf8_unchecked(slice) }; + unsafe { ptr::write(s, str.into()) }; +} + #[derive(Debug)] pub struct RuntimeType { /// The name the type can be referenced by from Roto @@ -632,6 +643,7 @@ impl Runtime { functions: Default::default(), type_registry: Default::default(), constants: Default::default(), + string_init_function: init_string as _, }; rt.register_copy_type_with_name::<()>( @@ -639,12 +651,15 @@ impl Runtime { "The unit type that has just one possible value. It can be used \ when there is nothing meaningful to be returned.", )?; + rt.register_copy_type::( "The boolean type\n\n\ This type has two possible values: `true` and `false`. Several \ boolean operations can be used with booleans, such as `&&` (\ logical and), `||` (logical or) and `not`.", )?; + + // All the integer types rt.register_copy_type::(int_docs!(u8))?; rt.register_copy_type::(int_docs!(u16))?; rt.register_copy_type::(int_docs!(u32))?; @@ -653,6 +668,7 @@ impl Runtime { rt.register_copy_type::(int_docs!(i16))?; rt.register_copy_type::(int_docs!(i32))?; rt.register_copy_type::(int_docs!(i64))?; + rt.register_copy_type::( "An ASN: an Autonomous System Number\n\ \n\ @@ -666,6 +682,7 @@ impl Runtime { AS4294967295\n\ ```\n\ ")?; + rt.register_copy_type::( "An IP address\n\nCan be either IPv4 or IPv6.\n\ \n\ @@ -682,6 +699,7 @@ impl Runtime { ```\n\ ", )?; + rt.register_copy_type::( "An IP address prefix: the combination of an IP address and a prefix length\n\n\ A prefix can be constructed with the `/` operator or with the \ @@ -695,6 +713,11 @@ impl Runtime { ", )?; + rt.register_clone_type_with_name::>( + "String", + "The string type", + )?; + /// Construct a new prefix /// /// A prefix can also be constructed with the `/` operator. @@ -854,6 +877,7 @@ pub mod tests { "Asn", "IpAddr", "Prefix", + "String", "OriginType", "LocalPref", "Community", diff --git a/src/runtime/ty.rs b/src/runtime/ty.rs index 218ad3c3..89f7120f 100644 --- a/src/runtime/ty.rs +++ b/src/runtime/ty.rs @@ -12,7 +12,7 @@ use std::{ any::{type_name, TypeId}, collections::HashMap, net::IpAddr, - sync::{LazyLock, Mutex}, + sync::{Arc, LazyLock, Mutex}, }; use inetnum::{addr::Prefix, asn::Asn}; @@ -239,6 +239,18 @@ impl Reflect for Prefix { } } +impl Reflect for Arc { + type AsParam = *mut Self; + + fn as_param(&mut self) -> Self::AsParam { + self as _ + } + + fn resolve(registry: &mut TypeRegistry) -> Ty { + registry.store::(TypeDescription::Leaf) + } +} + macro_rules! simple_reflect { ($t:ty) => { impl Reflect for $t { diff --git a/src/typechecker/info.rs b/src/typechecker/info.rs index 3e26836f..33242a73 100644 --- a/src/typechecker/info.rs +++ b/src/typechecker/info.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, net::IpAddr}; +use std::{collections::HashMap, net::IpAddr, sync::Arc}; use inetnum::addr::Prefix; @@ -159,6 +159,9 @@ impl TypeInfo { Type::Primitive(Primitive::Prefix) => { std::mem::align_of::() as u32 } + Type::Primitive(Primitive::String) => { + std::mem::align_of::>() as u32 + } Type::BuiltIn(_, id) => { rt.get_runtime_type(id).unwrap().alignment() as u32 } diff --git a/src/typechecker/types.rs b/src/typechecker/types.rs index 7e8a1b29..7df57693 100644 --- a/src/typechecker/types.rs +++ b/src/typechecker/types.rs @@ -6,6 +6,7 @@ use crate::{ use std::{ any::TypeId, fmt::{Debug, Display}, + sync::Arc, }; #[derive(Clone, Debug, PartialEq, Eq)] @@ -185,7 +186,7 @@ impl Primitive { U32 | I32 | Asn => 4, U64 | I64 => 8, Unit => 0, - String => 4, + String => std::mem::size_of::>() as u32, IpAddr => std::mem::size_of::() as u32, Prefix => std::mem::size_of::() as u32, } From 6a47fc48bc8d9d366d8166d69c41d7a7aeb4948b Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Fri, 13 Dec 2024 16:41:00 +0100 Subject: [PATCH 2/8] add some string methods --- macros/src/lib.rs | 2 +- src/codegen/tests.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++ src/runtime/mod.rs | 27 ++++++++++++++++++----- 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 6a3db341..98cbba77 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -266,7 +266,7 @@ fn generate_function(item: syn::ItemFn) -> Intermediate { #vis extern "C" fn #ident #generics ( out: *mut #ret, #(#inputs,)* ) { #item - unsafe { *out = #ident(#(#args),*) }; + unsafe { std::ptr::write(out, #ident(#(#args),*)) }; } }; diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index 8af01a18..28d20e45 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -993,3 +993,54 @@ fn string() { let res = f.call(&mut ()); assert_eq!(res, Verdict::Accept("hello".into())); } + +#[test] +fn string_append() { + let s = src!( + r#" + filter-map main(name: String) { + apply { + accept "Hello ".append(name) + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict, ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "Martin".into()); + assert_eq!(res, Verdict::Accept("Hello Martin".into())); +} + +#[test] +fn string_contains() { + let s = src!( + r#" + filter-map main(s: String) { + apply { + if "incomprehensibilities".contains(s) { + accept + } else { + reject + } + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict<(), ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "pre".into()); + assert_eq!(res, Verdict::Accept(())); + + let res = f.call(&mut (), "post".into()); + assert_eq!(res, Verdict::Reject(())); +} diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 0d8ece59..de759822 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -713,11 +713,6 @@ impl Runtime { ", )?; - rt.register_clone_type_with_name::>( - "String", - "The string type", - )?; - /// Construct a new prefix /// /// A prefix can also be constructed with the `/` operator. @@ -803,6 +798,28 @@ impl Runtime { ) .unwrap(); + rt.register_clone_type_with_name::>( + "String", + "The string type", + )?; + + #[roto_method(rt, Arc)] + fn append(a: *const Arc, b: *const Arc) -> Arc { + let a = unsafe { &*a }; + let b = unsafe { &*b }; + format!("{a}{b}").into() + } + + #[roto_method(rt, Arc)] + fn contains( + haystack: *const Arc, + needle: *const Arc, + ) -> bool { + let haystack = unsafe { &*haystack }; + let needle = unsafe { &*needle }; + haystack.contains(needle.as_ref()) + } + Ok(rt) } From eddc15d99a316dcc65737c6d1cb6484122348dd0 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 16 Dec 2024 14:46:55 +0100 Subject: [PATCH 3/8] more string methods --- src/codegen/tests.rs | 109 ++++++++++++++++++++++++++++++++++++++++++- src/runtime/mod.rs | 26 +++++++++++ 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index 28d20e45..3a9b262f 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -1038,9 +1038,114 @@ fn string_contains() { .get_function::<(), (Arc,), Verdict<(), ()>>("main") .unwrap(); - let res = f.call(&mut (), "pre".into()); + let res = f.call(&mut (), "incompre".into()); assert_eq!(res, Verdict::Accept(())); - let res = f.call(&mut (), "post".into()); + let res = f.call(&mut (), "hensi".into()); + assert_eq!(res, Verdict::Accept(())); + + let res = f.call(&mut (), "bilities".into()); + assert_eq!(res, Verdict::Accept(())); + + let res = f.call(&mut (), "nananana".into()); assert_eq!(res, Verdict::Reject(())); } + +#[test] +fn string_starts_with() { + let s = src!( + r#" + filter-map main(s: String) { + apply { + if "incomprehensibilities".starts_with(s) { + accept + } else { + reject + } + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict<(), ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "incompre".into()); + assert_eq!(res, Verdict::Accept(())); + + let res = f.call(&mut (), "hensi".into()); + assert_eq!(res, Verdict::Reject(())); + + let res = f.call(&mut (), "bilities".into()); + assert_eq!(res, Verdict::Reject(())); + + let res = f.call(&mut (), "nananana".into()); + assert_eq!(res, Verdict::Reject(())); +} + +#[test] +fn string_ends_with() { + let s = src!( + r#" + filter-map main(s: String) { + apply { + if "incomprehensibilities".ends_with(s) { + accept + } else { + reject + } + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict<(), ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "incompre".into()); + assert_eq!(res, Verdict::Reject(())); + + let res = f.call(&mut (), "hensi".into()); + assert_eq!(res, Verdict::Reject(())); + + let res = f.call(&mut (), "bilities".into()); + assert_eq!(res, Verdict::Accept(())); + + let res = f.call(&mut (), "nananana".into()); + assert_eq!(res, Verdict::Reject(())); +} + +#[test] +fn string_to_lowercase_and_uppercase() { + let s = src!( + r#" + filter-map main(lower: bool, s: String) { + apply { + if lower { + accept s.to_lowercase() + } else { + accept s.to_uppercase() + } + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (bool, Arc), Verdict, ()>>("main") + .unwrap(); + + let res = f.call(&mut (), true, "WHISPER THIS!".into()); + assert_eq!(res, Verdict::Accept("whisper this!".into())); + + let res = f.call(&mut (), false, "now shout this!".into()); + assert_eq!(res, Verdict::Accept("NOW SHOUT THIS!".into())); +} diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index de759822..5d896189 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -820,6 +820,32 @@ impl Runtime { haystack.contains(needle.as_ref()) } + #[roto_method(rt, Arc)] + fn starts_with(s: *const Arc, prefix: *const Arc) -> bool { + let s = unsafe { &*s }; + let prefix = unsafe { &*prefix }; + s.starts_with(prefix.as_ref()) + } + + #[roto_method(rt, Arc)] + fn ends_with(s: *const Arc, prefix: *const Arc) -> bool { + let s = unsafe { &*s }; + let prefix = unsafe { &*prefix }; + s.ends_with(prefix.as_ref()) + } + + #[roto_method(rt, Arc)] + fn to_lowercase(s: *const Arc) -> Arc { + let s = unsafe { &*s }; + s.to_lowercase().into() + } + + #[roto_method(rt, Arc)] + fn to_uppercase(s: *const Arc) -> Arc { + let s = unsafe { &*s }; + s.to_uppercase().into() + } + Ok(rt) } From 72b7f11cd438a9cfaacd4be41c92574241e3d697 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Mon, 16 Dec 2024 15:48:18 +0100 Subject: [PATCH 4/8] add plus operator for strings --- src/codegen/tests.rs | 26 ++++++++++++++++++++-- src/lower/mod.rs | 48 +++++++++++++++++++++++++++++++++++++++++ src/parser/expr.rs | 8 +++---- src/typechecker/expr.rs | 34 +++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+), 6 deletions(-) diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index 3a9b262f..90c3fa29 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -1000,7 +1000,7 @@ fn string_append() { r#" filter-map main(name: String) { apply { - accept "Hello ".append(name) + accept "Hello ".append(name).append("!") } } "# @@ -1013,7 +1013,29 @@ fn string_append() { .unwrap(); let res = f.call(&mut (), "Martin".into()); - assert_eq!(res, Verdict::Accept("Hello Martin".into())); + assert_eq!(res, Verdict::Accept("Hello Martin!".into())); +} + +#[test] +fn string_plus_operator() { + let s = src!( + r#" + filter-map main(name: String) { + apply { + accept "Hello " + name + "!" + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict, ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "Martin".into()); + assert_eq!(res, Verdict::Accept("Hello Martin!".into())); } #[test] diff --git a/src/lower/mod.rs b/src/lower/mod.rs index 14ad8ee8..8215bd9b 100644 --- a/src/lower/mod.rs +++ b/src/lower/mod.rs @@ -764,6 +764,54 @@ impl<'r> Lowerer<'r> { let place = self.new_tmp(); match (op, binop_to_cmp(op, &ty), ty) { + ( + ast::BinOp::Add, + _, + Type::Primitive(Primitive::String), + ) => { + let function = self.type_info.function(id); + let FunctionDefinition::Runtime(runtime_func) = + function.definition.clone() + else { + panic!() + }; + + let size = self.type_info.size_of( + &Type::Primitive(Primitive::String), + self.runtime, + ); + let alignment = self.type_info.alignment_of( + &Type::Primitive(Primitive::String), + self.runtime, + ); + let align_shift = alignment.ilog2() as u8; + self.add(Instruction::Alloc { + to: place.clone(), + size, + align_shift, + }); + + let ident = Identifier::from("append"); + let ir_func = IrFunction { + name: ident, + ptr: runtime_func.description.pointer(), + params: vec![ + IrType::Pointer, + IrType::Pointer, + IrType::Pointer, + ], + ret: None, + }; + + self.runtime_functions + .insert(runtime_func.id, ir_func); + + self.add(Instruction::CallRuntime { + to: None, + func: runtime_func, + args: vec![place.clone().into(), left, right], + }); + } ( ast::BinOp::Div, _, diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 241a13ee..ab00f930 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -218,14 +218,14 @@ impl Parser<'_, '_> { fn sum(&mut self, r: Restrictions) -> ParseResult> { let left = self.term(r)?; if self.next_is(Token::Plus) { - let right = self.term(r)?; + let right = self.sum(r)?; let span = self.merge_spans(&left, &right); Ok(self.spans.add( span, Expr::BinOp(Box::new(left), BinOp::Add, Box::new(right)), )) } else if self.next_is(Token::Hyphen) { - let right = self.term(r)?; + let right = self.sum(r)?; let span = self.merge_spans(&left, &right); Ok(self.spans.add( span, @@ -239,14 +239,14 @@ impl Parser<'_, '_> { fn term(&mut self, r: Restrictions) -> ParseResult> { let left = self.negation(r)?; if self.next_is(Token::Star) { - let right = self.negation(r)?; + let right = self.term(r)?; let span = self.merge_spans(&left, &right); Ok(self.spans.add( span, Expr::BinOp(Box::new(left), BinOp::Mul, Box::new(right)), )) } else if self.next_is(Token::Slash) { - let right = self.negation(r)?; + let right = self.term(r)?; let span = self.merge_spans(&left, &right); Ok(self.spans.add( span, diff --git a/src/typechecker/expr.rs b/src/typechecker/expr.rs index da7ddea7..f1a51993 100644 --- a/src/typechecker/expr.rs +++ b/src/typechecker/expr.rs @@ -679,6 +679,40 @@ impl TypeChecker<'_> { } }; + if let Add = op { + let var = self.fresh_var(); + let ctx_new = ctx.with_type(var.clone()); + + let mut diverges = false; + diverges |= self.expr(scope, &ctx_new, left)?; + + let resolved = self.resolve_type(&var); + + if let Type::Primitive(Primitive::String) = resolved { + diverges |= self.expr(scope, &ctx_new, right)?; + + self.unify( + &ctx.expected_type, + &Type::Primitive(Primitive::String), + span, + None, + )?; + + let name = Identifier::from("append"); + let (function, _sig) = self + .find_function( + &FunctionKind::Method(Type::Primitive( + Primitive::String, + )), + name, + ) + .unwrap(); + let function = function.clone(); + self.type_info.function_calls.insert(span, function); + return Ok(diverges); + } + } + match op { And | Or => { self.unify( From 855fe820bfbe6c9c4531e6435a088d2d6e267891 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 18 Dec 2024 11:23:15 +0100 Subject: [PATCH 5/8] improve documentation of strings, constants and context --- macros/src/lib.rs | 28 +++++++++------ src/codegen/tests.rs | 25 +++++++++++++ src/runtime/context.rs | 1 + src/runtime/mod.rs | 80 ++++++++++++++++++++++++++++++++++++++---- 4 files changed, 117 insertions(+), 17 deletions(-) diff --git a/macros/src/lib.rs b/macros/src/lib.rs index 98cbba77..24866278 100644 --- a/macros/src/lib.rs +++ b/macros/src/lib.rs @@ -29,13 +29,15 @@ pub fn roto_context(item: TokenStream) -> TokenStream { let offset = quote!(std::mem::offset_of!(Self, #field_name)); let type_name = quote!(std::any::type_name::<#field_ty>()); let type_id = quote!(std::any::TypeId::of::<#field_ty>()); + let docstring = gather_docstring(&f.attrs); quote!( roto::ContextField { name: stringify!(#field_name), offset: #offset, type_name: #type_name, - type_id: #type_id + type_id: #type_id, + docstring: String::from(#docstring), } ) }) @@ -191,17 +193,10 @@ pub fn roto_static_method( TokenStream::from(expanded) } -fn generate_function(item: syn::ItemFn) -> Intermediate { - let syn::ItemFn { - attrs, - vis, - sig, - block: _, - } = item.clone(); - +fn gather_docstring(attrs: &[syn::Attribute]) -> String { let mut docstring = String::new(); - for attr in &attrs { + for attr in attrs { if attr.path().is_ident("doc") { let value = match &attr.meta { syn::Meta::NameValue(name_value) => &name_value.value, @@ -224,6 +219,19 @@ fn generate_function(item: syn::ItemFn) -> Intermediate { } } + docstring +} + +fn generate_function(item: syn::ItemFn) -> Intermediate { + let syn::ItemFn { + attrs, + vis, + sig, + block: _, + } = item.clone(); + + let docstring = gather_docstring(&attrs); + assert!(sig.unsafety.is_none()); assert!(sig.variadic.is_none()); diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index 90c3fa29..6f247bf7 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -1171,3 +1171,28 @@ fn string_to_lowercase_and_uppercase() { let res = f.call(&mut (), false, "now shout this!".into()); assert_eq!(res, Verdict::Accept("NOW SHOUT THIS!".into())); } + +#[test] +fn string_repeat() { + let s = src!( + r#" + filter-map main(s: String) { + define { + exclamation = (s + "!").to_uppercase(); + } + apply { + accept (exclamation + " ").repeat(4) + exclamation + } + } + "# + ); + + let mut p = compile(s); + + let f = p + .get_function::<(), (Arc,), Verdict, ()>>("main") + .unwrap(); + + let res = f.call(&mut (), "boo".into()); + assert_eq!(res, Verdict::Accept("BOO! BOO! BOO! BOO! BOO!".into())); +} diff --git a/src/runtime/context.rs b/src/runtime/context.rs index 6c5d3fab..591e1d30 100644 --- a/src/runtime/context.rs +++ b/src/runtime/context.rs @@ -30,6 +30,7 @@ pub struct ContextField { pub offset: usize, pub type_name: &'static str, pub type_id: TypeId, + pub docstring: String, } impl Context for () { diff --git a/src/runtime/mod.rs b/src/runtime/mod.rs index 5d896189..8fd17d26 100644 --- a/src/runtime/mod.rs +++ b/src/runtime/mod.rs @@ -574,6 +574,31 @@ impl Runtime { self.print_function(f); } + if let Some(ContextDescription { + type_id: _, + type_name: _, + fields, + }) = &self.context + { + for crate::ContextField { + name, + offset: _, + type_name: _, + type_id, + docstring, + } in fields + { + println!( + "`````{{roto:context}} {name}: {}", + self.print_ty(*type_id) + ); + for line in docstring.lines() { + println!("{line}"); + } + println!("`````\n"); + } + } + for RuntimeConstant { name, ty, @@ -581,10 +606,7 @@ impl Runtime { .. } in self.constants.values() { - println!( - "`````{{roto::constant}} {name}: {}", - self.print_ty(*ty) - ); + println!("`````{{roto:constant}} {name}: {}", self.print_ty(*ty)); for line in docstring.lines() { println!("{line}"); } @@ -803,6 +825,11 @@ impl Runtime { "The string type", )?; + /// Append a string to another, creating a new string + /// + /// ```roto + /// "hello".append(" ").append("world") # -> "hello world" + /// ``` #[roto_method(rt, Arc)] fn append(a: *const Arc, b: *const Arc) -> Arc { let a = unsafe { &*a }; @@ -810,6 +837,12 @@ impl Runtime { format!("{a}{b}").into() } + /// Check whether a string contains another string + /// + /// ```roto + /// "haystack".contains("hay") # -> true + /// "haystack".contains("corn") # -> false + /// ``` #[roto_method(rt, Arc)] fn contains( haystack: *const Arc, @@ -820,6 +853,12 @@ impl Runtime { haystack.contains(needle.as_ref()) } + /// Check whether a string starts with a given prefix + /// + /// ```roto + /// "haystack".contains("hay") # -> true + /// "haystack".contains("trees") # -> false + /// ``` #[roto_method(rt, Arc)] fn starts_with(s: *const Arc, prefix: *const Arc) -> bool { let s = unsafe { &*s }; @@ -827,25 +866,52 @@ impl Runtime { s.starts_with(prefix.as_ref()) } + /// Check whether a string end with a given suffix + /// + /// ```roto + /// "haystack".contains("stack") # -> true + /// "haystack".contains("black") # -> false + /// ``` #[roto_method(rt, Arc)] - fn ends_with(s: *const Arc, prefix: *const Arc) -> bool { + fn ends_with(s: *const Arc, suffix: *const Arc) -> bool { let s = unsafe { &*s }; - let prefix = unsafe { &*prefix }; - s.ends_with(prefix.as_ref()) + let suffix = unsafe { &*suffix }; + s.ends_with(suffix.as_ref()) } + /// Create a new string with all characters converted to lowercase + /// + /// ```roto + /// "LOUD".to_lowercase() # -> "loud" + /// ``` #[roto_method(rt, Arc)] fn to_lowercase(s: *const Arc) -> Arc { let s = unsafe { &*s }; s.to_lowercase().into() } + /// Create a new string with all characters converted to lowercase + /// + /// ```roto + /// "quiet".to_uppercase() # -> "QUIET" + /// ``` #[roto_method(rt, Arc)] fn to_uppercase(s: *const Arc) -> Arc { let s = unsafe { &*s }; s.to_uppercase().into() } + /// Repeat a string `n` times and join them + /// + /// ```roto + /// "ha".repeat(6) # -> "hahahahahaha" + /// ``` + #[roto_method(rt, Arc)] + fn repeat(s: *const Arc, n: u32) -> Arc { + let s = unsafe { &*s }; + s.repeat(n as usize).into() + } + Ok(rt) } From 459a44bb707f8e48e31d492db19887b3f957aad7 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Wed, 18 Dec 2024 11:28:17 +0100 Subject: [PATCH 6/8] add context to simple example so we can test context in the docs --- examples/simple.rs | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/examples/simple.rs b/examples/simple.rs index 7528973f..7e39c0e9 100644 --- a/examples/simple.rs +++ b/examples/simple.rs @@ -1,11 +1,20 @@ use std::{env::args, net::IpAddr}; -use roto::{read_files, Runtime, Verdict}; +use roto::{read_files, Context, Runtime, Verdict}; fn main() -> Result<(), roto::RotoReport> { env_logger::init(); - let runtime = Runtime::basic().unwrap(); + let mut runtime = Runtime::basic().unwrap(); + + // Adding a context is not necessary but done here for testing purposes + #[derive(Context)] + struct Ctx { + /// This is the foo usize + pub foo: u32, + } + + runtime.register_context_type::().unwrap(); let mut arguments = args(); let _program_name = arguments.next().unwrap(); From ba83f809748b36b29a2d04f4f13fcaa26a60f3f4 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Thu, 19 Dec 2024 20:15:28 +0100 Subject: [PATCH 7/8] fix some missing support for 64 bit numbers --- src/lower/mod.rs | 4 ++++ src/typechecker/mod.rs | 8 ++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/lower/mod.rs b/src/lower/mod.rs index 8215bd9b..106b4504 100644 --- a/src/lower/mod.rs +++ b/src/lower/mod.rs @@ -1148,6 +1148,9 @@ impl<'r> Lowerer<'r> { Type::Primitive(Primitive::U32) => { IrValue::U32(*x as u32) } + Type::Primitive(Primitive::U64) => { + IrValue::U64(*x as u64) + } Type::Primitive(Primitive::I8) => IrValue::I8(*x as i8), Type::Primitive(Primitive::I16) => { IrValue::I16(*x as i16) @@ -1155,6 +1158,7 @@ impl<'r> Lowerer<'r> { Type::Primitive(Primitive::I32) => { IrValue::I32(*x as i32) } + Type::Primitive(Primitive::I64) => IrValue::I64(*x), Type::IntVar(_) => IrValue::I32(*x as i32), _ => ice!("should be a type error"), } diff --git a/src/typechecker/mod.rs b/src/typechecker/mod.rs index 8b45d026..6f0073db 100644 --- a/src/typechecker/mod.rs +++ b/src/typechecker/mod.rs @@ -496,12 +496,16 @@ impl TypeChecker<'_> { (Never, x) | (x, Never) => x, ( IntVar(a), - b @ (Primitive(U8 | U16 | U32 | I8 | I16 | I32) | IntVar(_)), + b @ (Primitive(U8 | U16 | U32 | U64 | I8 | I16 | I32 | I64) + | IntVar(_)), ) => { self.type_info.unionfind.set(a, b.clone()); b.clone() } - (a @ Primitive(U8 | U16 | U32 | I8 | I16 | I32), IntVar(b)) => { + ( + a @ Primitive(U8 | U16 | U32 | U64 | I8 | I16 | I32 | I64), + IntVar(b), + ) => { self.type_info.unionfind.set(b, a.clone()); a.clone() } From 670347efa5ba6c04d5f6e1f8571cdb00374617e3 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 21 Jan 2025 10:23:55 +0100 Subject: [PATCH 8/8] adapt strings to let bindings --- src/codegen/tests.rs | 60 ++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 39 deletions(-) diff --git a/src/codegen/tests.rs b/src/codegen/tests.rs index acd24948..2121bc06 100644 --- a/src/codegen/tests.rs +++ b/src/codegen/tests.rs @@ -910,9 +910,7 @@ fn string() { let s = src!( r#" filter-map main() { - apply { - accept "hello" - } + accept "hello" } "# ); @@ -932,9 +930,7 @@ fn string_append() { let s = src!( r#" filter-map main(name: String) { - apply { - accept "Hello ".append(name).append("!") - } + accept "Hello ".append(name).append("!") } "# ); @@ -954,9 +950,7 @@ fn string_plus_operator() { let s = src!( r#" filter-map main(name: String) { - apply { - accept "Hello " + name + "!" - } + accept "Hello " + name + "!" } "# ); @@ -976,12 +970,10 @@ fn string_contains() { let s = src!( r#" filter-map main(s: String) { - apply { - if "incomprehensibilities".contains(s) { - accept - } else { - reject - } + if "incomprehensibilities".contains(s) { + accept + } else { + reject } } "# @@ -1011,12 +1003,10 @@ fn string_starts_with() { let s = src!( r#" filter-map main(s: String) { - apply { - if "incomprehensibilities".starts_with(s) { - accept - } else { - reject - } + if "incomprehensibilities".starts_with(s) { + accept + } else { + reject } } "# @@ -1046,12 +1036,10 @@ fn string_ends_with() { let s = src!( r#" filter-map main(s: String) { - apply { - if "incomprehensibilities".ends_with(s) { - accept - } else { - reject - } + if "incomprehensibilities".ends_with(s) { + accept + } else { + reject } } "# @@ -1081,12 +1069,10 @@ fn string_to_lowercase_and_uppercase() { let s = src!( r#" filter-map main(lower: bool, s: String) { - apply { - if lower { - accept s.to_lowercase() - } else { - accept s.to_uppercase() - } + if lower { + accept s.to_lowercase() + } else { + accept s.to_uppercase() } } "# @@ -1110,12 +1096,8 @@ fn string_repeat() { let s = src!( r#" filter-map main(s: String) { - define { - exclamation = (s + "!").to_uppercase(); - } - apply { - accept (exclamation + " ").repeat(4) + exclamation - } + let exclamation = (s + "!").to_uppercase(); + accept (exclamation + " ").repeat(4) + exclamation } "# );