From 05da49b048be0b96e1cbd43b41fb8282ed1eef24 Mon Sep 17 00:00:00 2001 From: faldor20 Date: Thu, 5 Dec 2024 11:52:28 +1000 Subject: [PATCH 1/6] initial experiment with file reading --- crates/roc_host/src/lib.rs | 41 +++++++++++++++++++++++++++++++++++++- platform/File.roc | 6 ++++++ platform/PlatformTasks.roc | 2 ++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index 91bbf4cf..af7f7824 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -6,12 +6,15 @@ #![allow(non_snake_case)] #![allow(improper_ctypes)] use core::ffi::c_void; -use roc_std::{RocBox, RocList, RocResult, RocStr}; +use hyper::body::Buf; +use roc_std::{RocBox, RocList, RocRefcounted, RocResult, RocStr}; use roc_std_heap::ThreadSafeRefcountedResourceHeap; use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; +use std::fmt::Debug; use std::fs::File; use std::io::{BufRead, BufReader, ErrorKind, Read, Write}; +use std::mem::ManuallyDrop; use std::net::TcpStream; use std::path::Path; use std::sync::OnceLock; @@ -301,6 +304,7 @@ pub fn init() { roc_fx_fileReadBytes as _, roc_fx_fileReader as _, roc_fx_fileReadLine as _, + roc_fx_fileReadByteBuf as _, roc_fx_fileDelete as _, roc_fx_cwd as _, roc_fx_posixTime as _, @@ -624,6 +628,41 @@ pub extern "C" fn roc_fx_fileReadLine(data: RocBox<()>) -> RocResult } } +#[no_mangle] +pub extern "C" fn roc_fx_fileReadByteBuf( + data: RocBox<()>, + buf: &mut RocList, +) -> RocResult, RocStr> { + let buf_reader: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); + + loop { + let available = match buf_reader.fill_buf() { + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; + // We should be able to ask the user to "return" their buffer. So that if they do they get the same buffer back and we don't have to re-allocate. Should be a nice optimization. + // TODO: If the capacity is larger but the len isn't right we should be able to extend the len to match. I don't have access to a function that does that though + if buf.is_unique() || !buf.is_readonly() { + if buf.capacity() >= available.len() { + unsafe { + // We inc the refence count because roc will think after we return the reference count should be zero. + // buf.inc(); + let roc_list = + RocList::from_raw_parts(buf.as_mut_ptr(), buf.len(), buf.capacity()); + let len = available.len(); + buf_reader.consume(len); + return RocResult::ok(roc_list); + } + } + } + let list = RocResult::ok(RocList::from_slice(available)); + let len = available.len(); + buf_reader.consume(len); + return list; + } +} + fn read_until( r: &mut R, delim: u8, diff --git a/platform/File.roc b/platform/File.roc index 32fb5e24..6eb56749 100644 --- a/platform/File.roc +++ b/platform/File.roc @@ -16,6 +16,7 @@ module [ openReader!, openReaderWithCapacity!, readLine!, + readBytesBuf!, ] import Path exposing [Path, MetadataErr] @@ -237,3 +238,8 @@ readLine! : Reader => Result (List U8) [FileReadErr Path Str] readLine! = \@Reader { reader, path } -> PlatformTasks.fileReadLine! reader |> Result.mapErr \err -> FileReadErr path err + +readBytesBuf! : Reader,List U8 => Result (List U8) [FileReadErr Path Str] +readBytesBuf! = \@Reader { reader, path },buf -> + PlatformTasks.fileReadByteBuf! reader buf + |> Result.mapErr \err -> FileReadErr path err diff --git a/platform/PlatformTasks.roc b/platform/PlatformTasks.roc index 8a03b3ea..9aeed0c5 100644 --- a/platform/PlatformTasks.roc +++ b/platform/PlatformTasks.roc @@ -30,6 +30,7 @@ hosted PlatformTasks fileWriteBytes!, fileReader!, fileReadLine!, + fileReadByteBuf!, pathType!, posixTime!, tcpConnect!, @@ -85,6 +86,7 @@ fileReadBytes! : List U8 => Result (List U8) Str FileReader := Box {} fileReader! : List U8, U64 => Result FileReader Str fileReadLine! : FileReader => Result (List U8) Str +fileReadByteBuf! : FileReader, List U8 => Result (List U8) Str envDict! : {} => List (Str, Str) envVar! : Str => Result Str {} From 24459f3ddccdf96c8ea5cb1ac312b30585d6c59d Mon Sep 17 00:00:00 2001 From: faldor20 Date: Thu, 5 Dec 2024 11:52:47 +1000 Subject: [PATCH 2/6] test using roc slice as buf --- crates/roc_host/src/lib.rs | 105 ++++++++++++++++++++++++++++--------- platform/File.roc | 11 ++++ platform/PlatformTasks.roc | 2 + 3 files changed, 93 insertions(+), 25 deletions(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index af7f7824..cd583f10 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -13,7 +13,7 @@ use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; use std::fmt::Debug; use std::fs::File; -use std::io::{BufRead, BufReader, ErrorKind, Read, Write}; +use std::io::{BufRead, BufReader, ErrorKind, Read, Seek, Write}; use std::mem::ManuallyDrop; use std::net::TcpStream; use std::path::Path; @@ -43,6 +43,17 @@ fn file_heap() -> &'static ThreadSafeRefcountedResourceHeap> { .expect("Failed to allocate mmap for file handle references.") }) } +fn reader_heap() -> &'static ThreadSafeRefcountedResourceHeap> { + static FILE_HEAP: OnceLock>> = OnceLock::new(); + FILE_HEAP.get_or_init(|| { + let DEFAULT_MAX_FILES = 65536; + let max_files = env::var("ROC_BASIC_CLI_MAX_FILES") + .map(|v| v.parse().unwrap_or(DEFAULT_MAX_FILES)) + .unwrap_or(DEFAULT_MAX_FILES); + ThreadSafeRefcountedResourceHeap::new(max_files) + .expect("Failed to allocate mmap for file handle references.") + }) +} fn tcp_heap() -> &'static ThreadSafeRefcountedResourceHeap> { // TODO: Should this be a BufReader and BufWriter of the tcp stream? @@ -303,6 +314,7 @@ pub fn init() { roc_fx_pathType as _, roc_fx_fileReadBytes as _, roc_fx_fileReader as _, + roc_fx_fileReaderRocBuf as _, roc_fx_fileReadLine as _, roc_fx_fileReadByteBuf as _, roc_fx_fileDelete as _, @@ -613,6 +625,35 @@ pub extern "C" fn roc_fx_fileReader( Err(err) => RocResult::err(toRocReadError(err)), } } +#[repr(C)] +pub struct RocReader { + internalList: RocList, + reader: R, +} + +#[no_mangle] +pub extern "C" fn roc_fx_fileReaderRocBuf( + roc_path: &RocList, + buf: &mut RocList, +) -> RocResult, RocStr> { + match File::open(path_from_roc_path(roc_path)) { + Ok(file) => unsafe { + let internalList = RocList::from_raw_parts(buf.as_mut_ptr(), buf.len(), buf.capacity()); + let roc_reader = RocReader { + reader: file, + internalList, + }; + let heap = reader_heap(); + buf.inc(); + let alloc_result = heap.alloc_for(roc_reader); + match alloc_result { + Ok(out) => RocResult::ok(out), + Err(err) => RocResult::err(toRocReadError(err)), + } + }, + Err(err) => RocResult::err(toRocReadError(err)), + } +} #[no_mangle] pub extern "C" fn roc_fx_fileReadLine(data: RocBox<()>) -> RocResult, RocStr> { @@ -628,38 +669,52 @@ pub extern "C" fn roc_fx_fileReadLine(data: RocBox<()>) -> RocResult } } +// We should be able to ask the user to "return" their buffer. So that if they do they get the same buffer back and we don't have to re-allocate. Should be a nice optimization. +// TODO: If the capacity is larger but the len isn't right we should be able to extend the len to match. I don't have access to a function that does that though #[no_mangle] pub extern "C" fn roc_fx_fileReadByteBuf( data: RocBox<()>, buf: &mut RocList, ) -> RocResult, RocStr> { - let buf_reader: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); + let buf_reader: &mut RocReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); - loop { - let available = match buf_reader.fill_buf() { - Ok(n) => n, - Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, - Err(e) => return RocResult::err(e.to_string().as_str().into()), - }; - // We should be able to ask the user to "return" their buffer. So that if they do they get the same buffer back and we don't have to re-allocate. Should be a nice optimization. - // TODO: If the capacity is larger but the len isn't right we should be able to extend the len to match. I don't have access to a function that does that though - if buf.is_unique() || !buf.is_readonly() { - if buf.capacity() >= available.len() { - unsafe { - // We inc the refence count because roc will think after we return the reference count should be zero. - // buf.inc(); - let roc_list = - RocList::from_raw_parts(buf.as_mut_ptr(), buf.len(), buf.capacity()); - let len = available.len(); - buf_reader.consume(len); - return RocResult::ok(roc_list); - } + let canUseInternal = + buf_reader.internalList.is_unique() || !buf_reader.internalList.is_readonly(); + + if canUseInternal { + let buf_slice = buf_reader.internalList.as_mut_slice(); + loop { + let read = match buf_reader.reader.read(buf_slice) { + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; + unsafe { + // We inc the refence count because roc will think after we return the reference count should be zero. + // buf.inc(); + + let roc_list = RocList::from_raw_parts( + buf_reader.internalList.as_mut_ptr(), + read, + buf_reader.internalList.capacity(), + ); + return RocResult::ok(roc_list); + } + } + } else { + let mut list = RocList::with_capacity(buf_reader.internalList.capacity()); + + loop { + unsafe { + let read = match buf_reader.reader.read(list.as_mut_slice()) { + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; + let roc_list = RocList::from_raw_parts(list.as_mut_ptr(), read, list.capacity()); + return RocResult::ok(roc_list); } } - let list = RocResult::ok(RocList::from_slice(available)); - let len = available.len(); - buf_reader.consume(len); - return list; } } diff --git a/platform/File.roc b/platform/File.roc index 6eb56749..2a149d21 100644 --- a/platform/File.roc +++ b/platform/File.roc @@ -15,6 +15,7 @@ module [ Reader, openReader!, openReaderWithCapacity!, + openReaderWithBuf!, readLine!, readBytesBuf!, ] @@ -243,3 +244,13 @@ readBytesBuf! : Reader,List U8 => Result (List U8) [FileReadErr Path Str] readBytesBuf! = \@Reader { reader, path },buf -> PlatformTasks.fileReadByteBuf! reader buf |> Result.mapErr \err -> FileReadErr path err + +# TODO! This returns a FIle but isn't actually the same as other readers so it would break if i use it in another reader +openReaderWithBuf! : Str, List U8 => Result Reader [GetFileReadErr Path ReadErr] +openReaderWithBuf! = \pathStr, capacity -> + path = Path.fromStr pathStr + + PlatformTasks.fileReaderRocBuf! (Str.toUtf8 pathStr) capacity + |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) + |> Result.map \reader -> @Reader { reader, path } + diff --git a/platform/PlatformTasks.roc b/platform/PlatformTasks.roc index 9aeed0c5..7493b2a4 100644 --- a/platform/PlatformTasks.roc +++ b/platform/PlatformTasks.roc @@ -29,6 +29,7 @@ hosted PlatformTasks fileWriteUtf8!, fileWriteBytes!, fileReader!, + fileReaderRocBuf!, fileReadLine!, fileReadByteBuf!, pathType!, @@ -85,6 +86,7 @@ fileReadBytes! : List U8 => Result (List U8) Str FileReader := Box {} fileReader! : List U8, U64 => Result FileReader Str +fileReaderRocBuf! : List U8,List U8=> Result FileReader Str fileReadLine! : FileReader => Result (List U8) Str fileReadByteBuf! : FileReader, List U8 => Result (List U8) Str From 1c6fa6c1e8ea308877a1712cdd837058989793e6 Mon Sep 17 00:00:00 2001 From: faldor20 Date: Thu, 5 Dec 2024 14:41:55 +1000 Subject: [PATCH 3/6] configurable buf reader to either overwrite or make a new slice --- crates/roc_host/src/lib.rs | 66 +++++++++++++++++++++++++++----------- 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index cd583f10..165204c9 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -13,7 +13,7 @@ use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; use std::fmt::Debug; use std::fs::File; -use std::io::{BufRead, BufReader, ErrorKind, Read, Seek, Write}; +use std::io::{BufRead, BufReader, ErrorKind, IsTerminal, Read, Seek, Write}; use std::mem::ManuallyDrop; use std::net::TcpStream; use std::path::Path; @@ -631,6 +631,15 @@ pub struct RocReader { reader: R, } +// impl Drop for RocReader +// where +// T: ?Sized, +// { +// fn drop(&mut self) { +// self.internalList.dec() +// } +// } + #[no_mangle] pub extern "C" fn roc_fx_fileReaderRocBuf( roc_path: &RocList, @@ -676,23 +685,32 @@ pub extern "C" fn roc_fx_fileReadByteBuf( data: RocBox<()>, buf: &mut RocList, ) -> RocResult, RocStr> { + let mut data = data; + //If I don't do this roc just silently crashes after the file goes out of scope. I suspect that's because it's de-allocating the list inside? + data.inc(); let buf_reader: &mut RocReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); let canUseInternal = - buf_reader.internalList.is_unique() || !buf_reader.internalList.is_readonly(); + // buf_reader.internalList.is_unique() || !buf_reader.internalList.is_readonly(); + //If the given list is the same as the internal buff re-use otherwise don't re-use + buf_reader.internalList.as_ptr()== buf.as_ptr(); if canUseInternal { - let buf_slice = buf_reader.internalList.as_mut_slice(); - loop { - let read = match buf_reader.reader.read(buf_slice) { - Ok(n) => n, - Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, - Err(e) => return RocResult::err(e.to_string().as_str().into()), - }; - unsafe { - // We inc the refence count because roc will think after we return the reference count should be zero. - // buf.inc(); - + // We inc the refence count because roc will think after we return the reference count should be zero. + buf_reader.internalList.inc(); + unsafe { + //This ensures we always expand the buffer to the full capacity of the list + let buf_slice: &mut [u8] = std::slice::from_raw_parts_mut( + buf_reader.internalList.as_mut_ptr(), + buf_reader.internalList.capacity(), + ); + loop { + let read = match buf_reader.reader.read(buf_slice) { + // Ok(n) if n < buf_len => return RocResult::err("no more bytes".into()), + Ok(n) => n, + Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, + Err(e) => return RocResult::err(e.to_string().as_str().into()), + }; let roc_list = RocList::from_raw_parts( buf_reader.internalList.as_mut_ptr(), read, @@ -702,16 +720,26 @@ pub extern "C" fn roc_fx_fileReadByteBuf( } } } else { - let mut list = RocList::with_capacity(buf_reader.internalList.capacity()); - - loop { - unsafe { - let read = match buf_reader.reader.read(list.as_mut_slice()) { + unsafe { + //Make a new list + let mut list = RocList::with_capacity(buf_reader.internalList.capacity()); + list.inc(); + //get a slice to the full memmory of the list + let slice: &mut [u8] = + std::slice::from_raw_parts_mut(list.as_mut_ptr(), list.capacity()); + buf_reader.internalList = list; + loop { + let read = match buf_reader.reader.read(slice) { Ok(n) => n, Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, Err(e) => return RocResult::err(e.to_string().as_str().into()), }; - let roc_list = RocList::from_raw_parts(list.as_mut_ptr(), read, list.capacity()); + //update the length based on amount read + let roc_list = RocList::from_raw_parts( + buf_reader.internalList.as_mut_ptr(), + read, + buf_reader.internalList.capacity(), + ); return RocResult::ok(roc_list); } } From 0e337745ec2cdfc85495d6612862908139418b52 Mon Sep 17 00:00:00 2001 From: faldor20 Date: Fri, 6 Dec 2024 14:14:27 +1000 Subject: [PATCH 4/6] add fast whole file read --- crates/roc_host/src/lib.rs | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index 165204c9..6fdf876c 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -587,17 +587,36 @@ fn path_from_roc_path(bytes: &RocList) -> Cow<'_, Path> { Cow::Owned(std::path::PathBuf::from(os_string)) } +#[no_mangle] #[no_mangle] pub extern "C" fn roc_fx_fileReadBytes(roc_path: &RocList) -> RocResult, RocStr> { - // TODO: write our own duplicate of `read_to_end` that directly fills a `RocList`. - // This adds an extra O(n) copy. - let mut bytes = Vec::new(); - match File::open(path_from_roc_path(roc_path)) { - Ok(mut file) => match file.read_to_end(&mut bytes) { - Ok(_bytes_read) => RocResult::ok(RocList::from(bytes.as_slice())), - Err(err) => RocResult::err(toRocReadError(err)), - }, + Ok(mut file) => { + let size = file + .metadata() + .map(|m| m.len()) + .expect("TODO: make robust: file has not size?"); + let mut buf_list = RocList::with_capacity(size as usize); + let buf_slice: &mut [u8] = unsafe { + std::slice::from_raw_parts_mut(buf_list.as_mut_ptr(), buf_list.capacity()) + }; + + match file.read_exact(buf_slice) { + Ok(()) => { + let out_list = unsafe { + RocList::from_raw_parts( + buf_list.as_mut_ptr(), + buf_list.capacity(), + buf_list.capacity(), + ) + }; + std::mem::forget(buf_list); + + RocResult::ok(out_list) + } + Err(err) => RocResult::err(toRocReadError(err)), + } + } Err(err) => RocResult::err(toRocReadError(err)), } } From 9fcf1038846033e32eefb8b1b3c4daf20a38cb7a Mon Sep 17 00:00:00 2001 From: faldor20 Date: Sat, 7 Dec 2024 15:43:59 +1000 Subject: [PATCH 5/6] made all file read effects use a roc buffer instead of rust buffered read --- crates/roc_host/src/lib.rs | 181 +++++++++++++------------------------ platform/File.roc | 68 +++++++++----- platform/PlatformTasks.roc | 8 +- 3 files changed, 112 insertions(+), 145 deletions(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index 6fdf876c..fe35b519 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -6,6 +6,7 @@ #![allow(non_snake_case)] #![allow(improper_ctypes)] use core::ffi::c_void; +use core::panic; use hyper::body::Buf; use roc_std::{RocBox, RocList, RocRefcounted, RocResult, RocStr}; use roc_std_heap::ThreadSafeRefcountedResourceHeap; @@ -32,19 +33,8 @@ thread_local! { .unwrap(); } -fn file_heap() -> &'static ThreadSafeRefcountedResourceHeap> { - static FILE_HEAP: OnceLock>> = OnceLock::new(); - FILE_HEAP.get_or_init(|| { - let DEFAULT_MAX_FILES = 65536; - let max_files = env::var("ROC_BASIC_CLI_MAX_FILES") - .map(|v| v.parse().unwrap_or(DEFAULT_MAX_FILES)) - .unwrap_or(DEFAULT_MAX_FILES); - ThreadSafeRefcountedResourceHeap::new(max_files) - .expect("Failed to allocate mmap for file handle references.") - }) -} -fn reader_heap() -> &'static ThreadSafeRefcountedResourceHeap> { - static FILE_HEAP: OnceLock>> = OnceLock::new(); +fn file_heap() -> &'static ThreadSafeRefcountedResourceHeap { + static FILE_HEAP: OnceLock> = OnceLock::new(); FILE_HEAP.get_or_init(|| { let DEFAULT_MAX_FILES = 65536; let max_files = env::var("ROC_BASIC_CLI_MAX_FILES") @@ -314,7 +304,6 @@ pub fn init() { roc_fx_pathType as _, roc_fx_fileReadBytes as _, roc_fx_fileReader as _, - roc_fx_fileReaderRocBuf as _, roc_fx_fileReadLine as _, roc_fx_fileReadByteBuf as _, roc_fx_fileDelete as _, @@ -587,7 +576,6 @@ fn path_from_roc_path(bytes: &RocList) -> Cow<'_, Path> { Cow::Owned(std::path::PathBuf::from(os_string)) } -#[no_mangle] #[no_mangle] pub extern "C" fn roc_fx_fileReadBytes(roc_path: &RocList) -> RocResult, RocStr> { match File::open(path_from_roc_path(roc_path)) { @@ -622,20 +610,11 @@ pub extern "C" fn roc_fx_fileReadBytes(roc_path: &RocList) -> RocResult, - size: u64, -) -> RocResult, RocStr> { +pub extern "C" fn roc_fx_fileReader(roc_path: &RocList) -> RocResult, RocStr> { match File::open(path_from_roc_path(roc_path)) { Ok(file) => { - let buf_reader = if size > 0 { - BufReader::with_capacity(size as usize, file) - } else { - BufReader::new(file) - }; - let heap = file_heap(); - let alloc_result = heap.alloc_for(buf_reader); + let alloc_result = heap.alloc_for(file); match alloc_result { Ok(out) => RocResult::ok(out), Err(err) => RocResult::err(toRocReadError(err)), @@ -644,155 +623,121 @@ pub extern "C" fn roc_fx_fileReader( Err(err) => RocResult::err(toRocReadError(err)), } } -#[repr(C)] -pub struct RocReader { - internalList: RocList, - reader: R, -} - -// impl Drop for RocReader -// where -// T: ?Sized, -// { -// fn drop(&mut self) { -// self.internalList.dec() -// } -// } - -#[no_mangle] -pub extern "C" fn roc_fx_fileReaderRocBuf( - roc_path: &RocList, - buf: &mut RocList, -) -> RocResult, RocStr> { - match File::open(path_from_roc_path(roc_path)) { - Ok(file) => unsafe { - let internalList = RocList::from_raw_parts(buf.as_mut_ptr(), buf.len(), buf.capacity()); - let roc_reader = RocReader { - reader: file, - internalList, - }; - let heap = reader_heap(); - buf.inc(); - let alloc_result = heap.alloc_for(roc_reader); - match alloc_result { - Ok(out) => RocResult::ok(out), - Err(err) => RocResult::err(toRocReadError(err)), - } - }, - Err(err) => RocResult::err(toRocReadError(err)), - } -} #[no_mangle] -pub extern "C" fn roc_fx_fileReadLine(data: RocBox<()>) -> RocResult, RocStr> { - let buf_reader: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); +pub extern "C" fn roc_fx_fileReadLine( + data: RocBox<()>, + //TODO: this would allow the internal buffer to get much much bigger, is this acceptable? SHould we maybe include a warning about that + buffer: RocList, +) -> RocResult, RocStr> { + let file: &mut File = ThreadSafeRefcountedResourceHeap::box_to_resource(data); - let mut buffer = RocList::empty(); - match read_until(buf_reader, b'\n', &mut buffer) { - Ok(..) => { + let buffer = if buffer.is_unique() { + buffer + } else { + RocList::with_capacity(8000) + }; + match read_until(file, b'\n', buffer) { + Ok(mut buffer) => { + buffer.inc(); // Note: this returns an empty list when no bytes were read, e.g. End Of File RocResult::ok(buffer) } Err(err) => RocResult::err(err.to_string().as_str().into()), } } - // We should be able to ask the user to "return" their buffer. So that if they do they get the same buffer back and we don't have to re-allocate. Should be a nice optimization. // TODO: If the capacity is larger but the len isn't right we should be able to extend the len to match. I don't have access to a function that does that though #[no_mangle] pub extern "C" fn roc_fx_fileReadByteBuf( - data: RocBox<()>, + reader: RocBox<()>, buf: &mut RocList, ) -> RocResult, RocStr> { - let mut data = data; - //If I don't do this roc just silently crashes after the file goes out of scope. I suspect that's because it's de-allocating the list inside? - data.inc(); - let buf_reader: &mut RocReader = ThreadSafeRefcountedResourceHeap::box_to_resource(data); + let file: &mut File = ThreadSafeRefcountedResourceHeap::box_to_resource(reader); - let canUseInternal = - // buf_reader.internalList.is_unique() || !buf_reader.internalList.is_readonly(); - //If the given list is the same as the internal buff re-use otherwise don't re-use - buf_reader.internalList.as_ptr()== buf.as_ptr(); + let canUseInternal = buf.is_unique(); if canUseInternal { - // We inc the refence count because roc will think after we return the reference count should be zero. - buf_reader.internalList.inc(); unsafe { //This ensures we always expand the buffer to the full capacity of the list - let buf_slice: &mut [u8] = std::slice::from_raw_parts_mut( - buf_reader.internalList.as_mut_ptr(), - buf_reader.internalList.capacity(), - ); + let buf_slice: &mut [u8] = + std::slice::from_raw_parts_mut(buf.as_mut_ptr(), buf.capacity()); loop { - let read = match buf_reader.reader.read(buf_slice) { - // Ok(n) if n < buf_len => return RocResult::err("no more bytes".into()), + let read = match file.read(buf_slice) { Ok(n) => n, Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, Err(e) => return RocResult::err(e.to_string().as_str().into()), }; - let roc_list = RocList::from_raw_parts( - buf_reader.internalList.as_mut_ptr(), - read, - buf_reader.internalList.capacity(), - ); + let mut roc_list = RocList::from_raw_parts(buf.as_mut_ptr(), read, buf.capacity()); + roc_list.inc(); + return RocResult::ok(roc_list); } } } else { + // return RocResult::err("not unique".into()); unsafe { //Make a new list - let mut list = RocList::with_capacity(buf_reader.internalList.capacity()); - list.inc(); + let mut list = RocList::with_capacity(buf.capacity()); //get a slice to the full memmory of the list let slice: &mut [u8] = std::slice::from_raw_parts_mut(list.as_mut_ptr(), list.capacity()); - buf_reader.internalList = list; loop { - let read = match buf_reader.reader.read(slice) { + let read = match file.read(slice) { Ok(n) => n, Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, Err(e) => return RocResult::err(e.to_string().as_str().into()), }; //update the length based on amount read - let roc_list = RocList::from_raw_parts( - buf_reader.internalList.as_mut_ptr(), - read, - buf_reader.internalList.capacity(), - ); + let roc_list = RocList::from_raw_parts(list.as_mut_ptr(), read, list.capacity()); + std::mem::forget(list); return RocResult::ok(roc_list); } } } } -fn read_until( +/// Reads until the provided delim expanding the roc buffer as it goes. Returns a new reference to the same roc buffer but with a length exactly as long as the +fn read_until( r: &mut R, delim: u8, - buf: &mut RocList, -) -> io::Result { + mut buf: RocList, +) -> io::Result> { let mut read = 0; + let og_capacity = buf.capacity(); loop { let (done, used) = { - let available = match r.fill_buf() { + //get a slice between the end of the last read and the end of the buffer + let buf_slice: &mut [u8] = unsafe { + std::slice::from_raw_parts_mut(buf.as_mut_ptr().add(read), buf.capacity() - read) + }; + let this_read = match r.read(buf_slice) { Ok(n) => n, Err(ref e) if matches!(e.kind(), ErrorKind::Interrupted) => continue, Err(e) => return Err(e), }; - match memchr::memchr(delim, available) { - Some(i) => { - buf.extend_from_slice(&available[..=i]); - (true, i + 1) - } - None => { - buf.extend_from_slice(available); - (false, available.len()) + //if we read 0 bytes we are done because that's EOF + if this_read == 0 { + (true, 0) + } else { + let readSlice: &[u8] = &buf_slice[..this_read]; + match memchr::memchr(delim, readSlice) { + Some(i) => (true, i + 1), + None => (false, this_read), } } }; - r.consume(used); read += used; if done || used == 0 { - return Ok(read); + let out = unsafe { RocList::from_raw_parts(buf.as_mut_ptr(), read, buf.capacity()) }; + //Don't drop the buffer because we are returning it + std::mem::forget(buf); + return Ok(out); + } + + // Ensure we have enough capacity for the next read + if buf.capacity() < read + og_capacity { + buf.reserve(og_capacity); } } } @@ -1188,9 +1133,9 @@ pub extern "C" fn roc_fx_tcpReadUntil( let stream: &mut BufReader = ThreadSafeRefcountedResourceHeap::box_to_resource(stream); - let mut buffer = RocList::empty(); - match read_until(stream, byte, &mut buffer) { - Ok(_) => RocResult::ok(buffer), + let buffer = RocList::with_capacity(8000); + match read_until(stream, byte, buffer) { + Ok(buffer) => RocResult::ok(buffer), Err(err) => RocResult::err(to_tcp_stream_err(err)), } } diff --git a/platform/File.roc b/platform/File.roc index 2a149d21..e22f39b4 100644 --- a/platform/File.roc +++ b/platform/File.roc @@ -7,6 +7,7 @@ module [ readUtf8!, readBytes!, # read, TODO fix "Ability specialization is unknown - code generation cannot proceed!: DeriveError(UnboundVar)" + read!, delete!, isDir!, isFile!, @@ -17,9 +18,9 @@ module [ openReaderWithCapacity!, openReaderWithBuf!, readLine!, - readBytesBuf!, + readBytesToBuf!, ] - +# import Shared exposing [ByteReader] import Path exposing [Path, MetadataErr] import InternalFile import PlatformTasks @@ -195,62 +196,85 @@ type! : Str => Result [IsFile, IsDir, IsSymLink] [PathErr MetadataErr] type! = \path -> Path.type! (Path.fromStr path) -Reader := { reader : PlatformTasks.FileReader, path : Path } +Reader := { reader : PlatformTasks.FileReader, path : Path, buffer : List U8 } ## Try to open a `File.Reader` for buffered (= part by part) reading given a path string. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's std::io::BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html). -## ## Use [readUtf8!] if you want to get the entire file contents at once. openReader! : Str => Result Reader [GetFileReadErr Path ReadErr] openReader! = \pathStr -> path = Path.fromStr pathStr + buffer = List.withCapacity 8000 # 0 means with default capacity - PlatformTasks.fileReader! (Str.toUtf8 pathStr) 0 + PlatformTasks.fileReader! (Str.toUtf8 pathStr) |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) - |> Result.map \reader -> @Reader { reader, path } + |> Result.map \reader -> @Reader { reader, path, buffer } ## Try to open a `File.Reader` for buffered (= part by part) reading given a path string. ## The buffer will be created with the specified capacity. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's std::io::BufReader](https://doc.rust-lang.org/std/io/struct.BufReader.html). -## ## Use [readUtf8!] if you want to get the entire file contents at once. openReaderWithCapacity! : Str, U64 => Result Reader [GetFileReadErr Path ReadErr] openReaderWithCapacity! = \pathStr, capacity -> path = Path.fromStr pathStr + # 8k is the default in rust and seems reasonable + buffer = List.withCapacity (if capacity == 0 then 8000 else capacity) - PlatformTasks.fileReader! (Str.toUtf8 pathStr) capacity + PlatformTasks.fileReader! (Str.toUtf8 pathStr) |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) - |> Result.map \reader -> @Reader { reader, path } + |> Result.map \reader -> @Reader { reader, path, buffer } ## Try to read a line from a file given a Reader. ## The line will be provided as the list of bytes (`List U8`) until a newline (`0xA` byte). ## This list will be empty when we reached the end of the file. ## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. ## -## This uses [rust's `BufRead::read_line`](https://doc.rust-lang.org/std/io/trait.BufRead.html#method.read_line). -## ## Use [readUtf8!] if you want to get the entire file contents at once. readLine! : Reader => Result (List U8) [FileReadErr Path Str] -readLine! = \@Reader { reader, path } -> - PlatformTasks.fileReadLine! reader +readLine! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadLine! reader buffer |> Result.mapErr \err -> FileReadErr path err -readBytesBuf! : Reader,List U8 => Result (List U8) [FileReadErr Path Str] -readBytesBuf! = \@Reader { reader, path },buf -> - PlatformTasks.fileReadByteBuf! reader buf +## Try to read bytes from a file given a Reader. +## Returns a list of bytes (`List U8`) read from the file. +## The list will be empty when we reach the end of the file. +## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. +## +## NOTE: Avoid storing a reference to the buffer returned by this function beyond the next call to try readBytes. +## That will allow the buffer to be reused and avoid unnecessary allocations. +## +## Use [readUtf8!] if you want to get the entire file contents at once as a UTF-8 string. +read! : Reader => Result (List U8) [FileReadErr Path Str] +read! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadByteBuf! reader buffer |> Result.mapErr \err -> FileReadErr path err -# TODO! This returns a FIle but isn't actually the same as other readers so it would break if i use it in another reader +## Try to open a `File.Reader` using the provided buffer as the internal buffer. +## See [examples/file-read-buffered.roc](https://github.com/roc-lang/basic-cli/blob/main/examples/file-read-buffered.roc) for example usage. +## +## Use [readUtf8!] if you want to get the entire file contents at once. openReaderWithBuf! : Str, List U8 => Result Reader [GetFileReadErr Path ReadErr] -openReaderWithBuf! = \pathStr, capacity -> +openReaderWithBuf! = \pathStr, buffer -> path = Path.fromStr pathStr - PlatformTasks.fileReaderRocBuf! (Str.toUtf8 pathStr) capacity + PlatformTasks.fileReader! (Str.toUtf8 pathStr) |> Result.mapErr \err -> GetFileReadErr path (InternalFile.handleReadErr err) - |> Result.map \reader -> @Reader { reader, path } + |> Result.map \reader -> @Reader { reader, path, buffer } + + +## Try to read bytes from a file given a Reader. +## Returns a list of bytes (`List U8`) read from the file. +## The list will be empty when we reach the end of the file. +## This function is exists for very specific use cases where you want to use multiple buffers with a single reader +## +## Prefer [File.readBytes!] which will automatically reuse the reader's internalbuffer. +readBytesToBuf! : Reader => Result (List U8) [FileReadErr Path Str] +readBytesToBuf! = \@Reader { reader, path, buffer } -> + PlatformTasks.fileReadByteBuf! reader buffer + |> Result.mapErr \err -> FileReadErr path err + + diff --git a/platform/PlatformTasks.roc b/platform/PlatformTasks.roc index 7493b2a4..f6702448 100644 --- a/platform/PlatformTasks.roc +++ b/platform/PlatformTasks.roc @@ -29,7 +29,6 @@ hosted PlatformTasks fileWriteUtf8!, fileWriteBytes!, fileReader!, - fileReaderRocBuf!, fileReadLine!, fileReadByteBuf!, pathType!, @@ -85,10 +84,9 @@ fileDelete! : List U8 => Result {} Str fileReadBytes! : List U8 => Result (List U8) Str FileReader := Box {} -fileReader! : List U8, U64 => Result FileReader Str -fileReaderRocBuf! : List U8,List U8=> Result FileReader Str -fileReadLine! : FileReader => Result (List U8) Str -fileReadByteBuf! : FileReader, List U8 => Result (List U8) Str +fileReader! : List U8 => Result FileReader Str +fileReadLine! : FileReader,List U8 => Result (List U8) Str +fileReadByteBuf! : FileReader, List U8=> Result (List U8) Str envDict! : {} => List (Str, Str) envVar! : Str => Result Str {} From 556e8fcfb80e1d1c6e50eb8a3c2398dee3b1a9c6 Mon Sep 17 00:00:00 2001 From: faldor20 Date: Mon, 9 Dec 2024 02:11:03 +1000 Subject: [PATCH 6/6] fixed rust warnings unused imports --- crates/roc_host/src/lib.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/crates/roc_host/src/lib.rs b/crates/roc_host/src/lib.rs index fe35b519..8cbdbf83 100644 --- a/crates/roc_host/src/lib.rs +++ b/crates/roc_host/src/lib.rs @@ -7,15 +7,12 @@ #![allow(improper_ctypes)] use core::ffi::c_void; use core::panic; -use hyper::body::Buf; use roc_std::{RocBox, RocList, RocRefcounted, RocResult, RocStr}; use roc_std_heap::ThreadSafeRefcountedResourceHeap; use std::borrow::{Borrow, Cow}; use std::ffi::OsStr; -use std::fmt::Debug; use std::fs::File; -use std::io::{BufRead, BufReader, ErrorKind, IsTerminal, Read, Seek, Write}; -use std::mem::ManuallyDrop; +use std::io::{BufRead, BufReader, ErrorKind, Read, Write}; use std::net::TcpStream; use std::path::Path; use std::sync::OnceLock;