diff --git a/Cargo.lock b/Cargo.lock index 8f6ccf6a8..daa8da209 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,4 +1,66 @@ [root] name = "ethcore-util" version = "0.1.0" +dependencies = [ + "env_logger 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "env_logger" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "log 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "libc" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "log" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "memchr" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/src/bytes.rs b/src/bytes.rs new file mode 100644 index 000000000..06735995f --- /dev/null +++ b/src/bytes.rs @@ -0,0 +1,151 @@ +//! To/From Bytes conversation for basic types +//! +//! Types implementing `ToBytes` and `FromBytes` traits +//! can be easily converted to and from bytes +//! +//! # Examples +//! + +use std::fmt; +use std::error::Error as StdError; + +/// TODO: optimise some conversations +pub trait ToBytes { + fn to_bytes(&self) -> Vec; + fn to_bytes_len(&self) -> usize { self.to_bytes().len() } + fn first_byte(&self) -> Option { self.to_bytes().first().map(|&x| { x })} +} + +impl <'a> ToBytes for &'a str { + fn to_bytes(&self) -> Vec { + From::from(*self) + } + + fn to_bytes_len(&self) -> usize { self.len() } +} + +impl ToBytes for String { + fn to_bytes(&self) -> Vec { + let s: &str = self.as_ref(); + From::from(s) + } + + fn to_bytes_len(&self) -> usize { self.len() } +} + +impl ToBytes for u8 { + fn to_bytes(&self) -> Vec { + match *self { + 0 => vec![], + _ => vec![*self] + } + } + + fn to_bytes_len(&self) -> usize { + match *self { + 0 => 0, + _ => 1 + } + } + fn first_byte(&self) -> Option { + match *self { + 0 => None, + _ => Some(*self) + } + } +} + +impl ToBytes for u64 { + fn to_bytes(&self) -> Vec { + let mut res= vec![]; + let count = self.to_bytes_len(); + for i in 0..count { + let j = count - 1 - i; + res.push((*self >> (j * 8)) as u8); + } + res + } + + fn to_bytes_len(&self) -> usize { 8 - self.leading_zeros() as usize / 8 } +} + +macro_rules! impl_map_to_bytes { + ($from: ident, $to: ty) => { + impl ToBytes for $from { + fn to_bytes(&self) -> Vec { (*self as $to).to_bytes() } + fn to_bytes_len(&self) -> usize { (*self as $to).to_bytes_len() } + } + } +} + +impl_map_to_bytes!(usize, u64); +impl_map_to_bytes!(u16, u64); +impl_map_to_bytes!(u32, u64); + +#[derive(Debug, PartialEq, Eq)] +pub enum FromBytesError { + UnexpectedEnd +} + +impl StdError for FromBytesError { + fn description(&self) -> &str { "from_bytes error" } +} + +impl fmt::Display for FromBytesError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self, f) + } +} + +pub type FromBytesResult = Result; + +/// implements "Sized", so the compiler can deducate the size +/// of the return type +pub trait FromBytes: Sized { + fn from_bytes(bytes: &[u8]) -> FromBytesResult; +} + +impl FromBytes for String { + fn from_bytes(bytes: &[u8]) -> FromBytesResult { + Ok(::std::str::from_utf8(bytes).unwrap().to_string()) + } +} + +impl FromBytes for u8 { + fn from_bytes(bytes: &[u8]) -> FromBytesResult { + match bytes.len() { + 0 => Ok(0), + _ => Ok(bytes[0]) + } + } +} + +impl FromBytes for u64 { + fn from_bytes(bytes: &[u8]) -> FromBytesResult { + match bytes.len() { + 0 => Ok(0), + l => { + let mut res = 0u64; + for i in 0..l { + let shift = (l - 1 - i) * 8; + res = res + ((bytes[i] as u64) << shift); + } + Ok(res) + } + } + } +} + +macro_rules! impl_map_from_bytes { + ($from: ident, $to: ident) => { + impl FromBytes for $from { + fn from_bytes(bytes: &[u8]) -> FromBytesResult<$from> { + $to::from_bytes(bytes).map(| x | { x as $from }) + } + } + } +} + +impl_map_from_bytes!(usize, u64); +impl_map_from_bytes!(u16, u64); +impl_map_from_bytes!(u32, u64); diff --git a/src/lib.rs b/src/lib.rs index a93251b65..17262b3ad 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,6 @@ +pub mod bytes; +pub mod rlp; + #[test] fn it_works() { } diff --git a/src/rlp.rs b/src/rlp.rs new file mode 100644 index 000000000..20acb8e5f --- /dev/null +++ b/src/rlp.rs @@ -0,0 +1,254 @@ +//! Rlp serialization module + +use std::fmt; +use std::cell::Cell; +use std::error::Error as StdError; +use bytes::{FromBytes, FromBytesError}; + +/// rlp container +#[derive(Debug)] +pub struct Rlp<'a>{ + bytes: &'a [u8], + cache: Cell +} + +/// rlp offset +#[derive(Copy, Clone, Debug)] +struct OffsetCache { + index: usize, + offset: usize +} + +impl OffsetCache { + fn new(index: usize, offset: usize) -> OffsetCache { + OffsetCache { index: index, offset: offset } + } +} + +/// stores basic information about item +struct ItemInfo { + prefix_len: usize, + value_len: usize +} + +impl ItemInfo { + fn new(prefix_len: usize, value_len: usize) -> ItemInfo { + ItemInfo { prefix_len: prefix_len, value_len: value_len } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum DecoderError { + FromBytesError(FromBytesError), + RlpIsTooShort, + RlpIsNotArray, + BadRlp, +} +impl StdError for DecoderError { + fn description(&self) -> &str { "builder error" } +} + +impl fmt::Display for DecoderError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Debug::fmt(&self, f) + } +} + +impl From for DecoderError { + fn from(err: FromBytesError) -> DecoderError { DecoderError::FromBytesError(err) } +} + +impl <'a>Rlp<'a> { + /// returns new instance of `Rlp` + pub fn new(bytes: &'a[u8]) -> Rlp<'a> { + Rlp { + bytes: bytes, + cache: Cell::new(OffsetCache::new(usize::max_value(), 0)) + } + } + + /// get container subset at given index + /// + /// paren container caches searched position + pub fn at(&self, index: usize) -> Result, DecoderError> { + if !self.is_array() { + return Err(DecoderError::RlpIsNotArray); + } + + // move to cached position if it's index is less or equal to + // current search index, otherwise move to beginning of array + let c = self.cache.get(); + let (mut bytes, to_skip) = match c.index <= index { + true => (try!(Rlp::consume(self.bytes, c.offset)), index - c.index), + false => (try!(self.consume_array_prefix()), index) + }; + + // skip up to x items + bytes = try!(Rlp::consume_items(bytes, to_skip)); + + // update the cache + self.cache.set(OffsetCache::new(index, self.bytes.len() - bytes.len())); + + // construct new rlp + let found = try!(Rlp::item_info(bytes)); + Ok(Rlp::new(&bytes[0..found.prefix_len + found.value_len])) + } + + /// returns true if rlp is an array + pub fn is_array(&self) -> bool { + self.bytes.len() > 0 && self.bytes[0] >= 0xc0 + } + + /// returns true if rlp is a value + pub fn is_value(&self) -> bool { + self.bytes.len() > 0 && self.bytes[0] <= 0xbf + } + + /// returns rlp iterator + pub fn iter(&'a self) -> RlpIterator<'a> { + self.into_iter() + } + + /// consumes first found prefix + fn consume_array_prefix(&self) -> Result<&'a [u8], DecoderError> { + let item = try!(Rlp::item_info(self.bytes)); + let bytes = try!(Rlp::consume(self.bytes, item.prefix_len)); + Ok(bytes) + } + + /// consumes fixed number of items + fn consume_items(bytes: &'a [u8], items: usize) -> Result<&'a [u8], DecoderError> { + let mut result = bytes; + for _ in 0..items { + let i = try!(Rlp::item_info(result)); + result = try!(Rlp::consume(result, (i.prefix_len + i.value_len))); + } + Ok(result) + } + + /// return first item info + fn item_info(bytes: &[u8]) -> Result { + let item = match bytes.first().map(|&x| x) { + None => return Err(DecoderError::RlpIsTooShort), + Some(0...0x7f) => ItemInfo::new(0, 1), + Some(l @ 0x80...0xb7) => ItemInfo::new(1, l as usize - 0x80), + Some(l @ 0xb8...0xbf) => { + let len_of_len = l as usize - 0xb7; + let prefix_len = 1 + len_of_len; + let value_len = try!(usize::from_bytes(&bytes[1..prefix_len])); + ItemInfo::new(prefix_len, value_len) + } + Some(l @ 0xc0...0xf7) => ItemInfo::new(1, l as usize - 0xc0), + Some(l @ 0xf8...0xff) => { + let len_of_len = l as usize - 0xf7; + let prefix_len = 1 + len_of_len; + let value_len = try!(usize::from_bytes(&bytes[1..prefix_len])); + ItemInfo::new(prefix_len, value_len) + }, + _ => return Err(DecoderError::BadRlp) + }; + + match item.prefix_len + item.value_len <= bytes.len() { + true => Ok(item), + false => Err(DecoderError::RlpIsTooShort) + } + } + + /// consumes slice prefix of length `len` + fn consume(bytes: &'a [u8], len: usize) -> Result<&'a [u8], DecoderError> { + match bytes.len() >= len { + true => Ok(&bytes[len..]), + false => Err(DecoderError::RlpIsTooShort) + } + } +} + +/// non-consuming rlp iterator +pub struct RlpIterator<'a> { + rlp: &'a Rlp<'a>, + index: usize +} + +impl <'a> IntoIterator for &'a Rlp<'a> { + type Item = Rlp<'a>; + type IntoIter = RlpIterator<'a>; + + fn into_iter(self) -> Self::IntoIter { + RlpIterator { rlp: self, index: 0 } + } +} + +impl <'a> Iterator for RlpIterator<'a> { + type Item = Rlp<'a>; + + fn next(&mut self) -> Option> { + let index = self.index; + let result = self.rlp.at(index).ok(); + self.index += 1; + result + } +} + +#[cfg(test)] +mod tests { + use rlp; + use rlp::Rlp; + + #[test] + fn rlp_at() { + let data = vec![0xc8, 0x83, b'c', b'a', b't', 0x83, b'd', b'o', b'g']; + { + let rlp = Rlp::new(&data); + assert!(rlp.is_array()); + + let cat = rlp.at(0).unwrap(); + assert!(cat.is_value()); + assert_eq!(cat.bytes, &[0x83, b'c', b'a', b't']); + + let dog = rlp.at(1).unwrap(); + assert!(dog.is_value()); + assert_eq!(dog.bytes, &[0x83, b'd', b'o', b'g']); + + let cat_again = rlp.at(0).unwrap(); + assert!(cat_again.is_value()); + assert_eq!(cat_again.bytes, &[0x83, b'c', b'a', b't']); + } + } + + #[test] + fn rlp_at_err() { + let data = vec![0xc8, 0x83, b'c', b'a', b't', 0x83, b'd', b'o']; + { + let rlp = Rlp::new(&data); + assert!(rlp.is_array()); + + let cat_err = rlp.at(0).unwrap_err(); + assert_eq!(cat_err, rlp::DecoderError::RlpIsTooShort); + + let dog_err = rlp.at(1).unwrap_err(); + assert_eq!(dog_err, rlp::DecoderError::RlpIsTooShort); + } + } + + #[test] + fn rlp_iter() { + let data = vec![0xc8, 0x83, b'c', b'a', b't', 0x83, b'd', b'o', b'g']; + { + let rlp = Rlp::new(&data); + let mut iter = rlp.iter(); + + let cat = iter.next().unwrap(); + assert!(cat.is_value()); + assert_eq!(cat.bytes, &[0x83, b'c', b'a', b't']); + + let dog = iter.next().unwrap(); + assert!(dog.is_value()); + assert_eq!(dog.bytes, &[0x83, b'd', b'o', b'g']); + + let cat_again = rlp.at(0).unwrap(); + assert!(cat_again.is_value()); + assert_eq!(cat_again.bytes, &[0x83, b'c', b'a', b't']); + } + } +} +