Commit 8541f94e authored by Jacek Caban's avatar Jacek Caban Committed by Alexandre Julliard

jscript: Store concatenated strings as a rope string to avoid useless copying.

parent 019a1af7
...@@ -16,13 +16,54 @@ ...@@ -16,13 +16,54 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/ */
#include <assert.h>
#include "jscript.h" #include "jscript.h"
#include "wine/debug.h" #include "wine/debug.h"
/*
* This is the length of a string that is considered to be long enough to be
* worth the rope to avoid copy.
* This probably could be tuned, but keep it low for a while to better test rope's code.
*/
#define JSSTR_SHORT_STRING_LENGTH 8
/*
* This is the max rope depth. While faster to allocate, ropes may become slow at access.
*/
#define JSSTR_MAX_ROPE_DEPTH 100
const char *debugstr_jsstr(jsstr_t *str) const char *debugstr_jsstr(jsstr_t *str)
{ {
return debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str)); return jsstr_is_inline(str) ? debugstr_wn(jsstr_as_inline(str)->buf, jsstr_length(str))
: jsstr_is_heap(str) ? debugstr_wn(jsstr_as_heap(str)->buf, jsstr_length(str))
: wine_dbg_sprintf("%s...", debugstr_jsstr(jsstr_as_rope(str)->left));
}
void jsstr_free(jsstr_t *str)
{
switch(jsstr_tag(str)) {
case JSSTR_HEAP:
heap_free(jsstr_as_heap(str)->buf);
break;
case JSSTR_ROPE: {
jsstr_rope_t *rope = jsstr_as_rope(str);
jsstr_release(rope->left);
jsstr_release(rope->right);
break;
}
case JSSTR_INLINE:
break;
}
heap_free(str);
}
static inline void jsstr_init(jsstr_t *str, unsigned len, jsstr_tag_t tag)
{
str->length_flags = len << JSSTR_LENGTH_SHIFT | tag;
str->ref = 1;
} }
WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r) WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
...@@ -36,8 +77,7 @@ WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r) ...@@ -36,8 +77,7 @@ WCHAR *jsstr_alloc_buf(unsigned len, jsstr_t **r)
if(!ret) if(!ret)
return NULL; return NULL;
ret->str.length_flags = len << JSSTR_LENGTH_SHIFT; jsstr_init(&ret->str, len, JSSTR_INLINE);
ret->str.ref = 1;
ret->buf[len] = 0; ret->buf[len] = 0;
*r = &ret->str; *r = &ret->str;
return ret->buf; return ret->buf;
...@@ -55,17 +95,116 @@ jsstr_t *jsstr_alloc_len(const WCHAR *buf, unsigned len) ...@@ -55,17 +95,116 @@ jsstr_t *jsstr_alloc_len(const WCHAR *buf, unsigned len)
return ret; return ret;
} }
static void jsstr_rope_extract(jsstr_rope_t *str, unsigned off, unsigned len, WCHAR *buf)
{
unsigned left_len = jsstr_length(str->left);
if(left_len <= off) {
jsstr_extract(str->right, off-left_len, len, buf);
}else if(left_len >= len+off) {
jsstr_extract(str->left, off, len, buf);
}else {
left_len -= off;
jsstr_extract(str->left, off, left_len, buf);
jsstr_extract(str->right, 0, len-left_len, buf+left_len);
}
}
void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf)
{
switch(jsstr_tag(str)) {
case JSSTR_INLINE:
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR));
return;
case JSSTR_HEAP:
memcpy(buf, jsstr_as_heap(str)->buf+off, len*sizeof(WCHAR));
return;
case JSSTR_ROPE:
return jsstr_rope_extract(jsstr_as_rope(str), off, len, buf);
}
}
static int jsstr_cmp_str(jsstr_t *jsstr, const WCHAR *str, unsigned len)
{
int ret;
switch(jsstr_tag(jsstr)) {
case JSSTR_INLINE:
ret = memcmp(jsstr_as_inline(jsstr)->buf, str, len*sizeof(WCHAR));
return ret || jsstr_length(jsstr) == len ? ret : 1;
case JSSTR_HEAP:
ret = memcmp(jsstr_as_heap(jsstr)->buf, str, len*sizeof(WCHAR));
return ret || jsstr_length(jsstr) == len ? ret : 1;
case JSSTR_ROPE: {
jsstr_rope_t *rope = jsstr_as_rope(jsstr);
unsigned left_len = jsstr_length(rope->left);
ret = jsstr_cmp_str(rope->left, str, min(len, left_len));
if(ret || len <= left_len)
return ret;
return jsstr_cmp_str(rope->right, str+left_len, len-left_len);
}
}
assert(0);
return 0;
}
#define TMP_BUF_SIZE 256
static int ropes_cmp(jsstr_rope_t *left, jsstr_rope_t *right)
{
WCHAR left_buf[TMP_BUF_SIZE], right_buf[TMP_BUF_SIZE];
unsigned left_len = jsstr_length(&left->str);
unsigned right_len = jsstr_length(&right->str);
unsigned cmp_off = 0, cmp_size;
int ret;
/* FIXME: We can avoid temporary buffers here. */
while(cmp_off < min(left_len, right_len)) {
cmp_size = min(left_len, right_len) - cmp_off;
if(cmp_size > TMP_BUF_SIZE)
cmp_size = TMP_BUF_SIZE;
jsstr_rope_extract(left, cmp_off, cmp_size, left_buf);
jsstr_rope_extract(right, cmp_off, cmp_size, right_buf);
ret = memcmp(left_buf, right_buf, cmp_size);
if(ret)
return ret;
cmp_off += cmp_size;
}
return left_len - right_len;
}
static inline const WCHAR *jsstr_try_flat(jsstr_t *str)
{
return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
: NULL;
}
int jsstr_cmp(jsstr_t *str1, jsstr_t *str2) int jsstr_cmp(jsstr_t *str1, jsstr_t *str2)
{ {
int len1 = jsstr_length(str1); unsigned len1 = jsstr_length(str1);
int len2 = jsstr_length(str2); unsigned len2 = jsstr_length(str2);
const WCHAR *str;
int ret; int ret;
ret = memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, min(len1, len2)*sizeof(WCHAR)); str = jsstr_try_flat(str2);
if(!ret) if(str) {
ret = len1 - len2; ret = jsstr_cmp_str(str1, str, min(len1, len2));
return ret || len1 == len2 ? ret : -1;
}
return ret; str = jsstr_try_flat(str1);
if(str) {
ret = jsstr_cmp_str(str2, str, min(len1, len2));
return ret || len1 == len2 ? -ret : 1;
}
return ropes_cmp(jsstr_as_rope(str1), jsstr_as_rope(str2));
} }
jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
...@@ -82,6 +221,31 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) ...@@ -82,6 +221,31 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
if(!len2) if(!len2)
return jsstr_addref(str1); return jsstr_addref(str1);
if(len1 + len2 >= JSSTR_SHORT_STRING_LENGTH) {
unsigned depth, depth2;
jsstr_rope_t *rope;
depth = jsstr_is_rope(str1) ? jsstr_as_rope(str1)->depth : 0;
depth2 = jsstr_is_rope(str2) ? jsstr_as_rope(str2)->depth : 0;
if(depth2 > depth)
depth = depth2;
if(depth++ < JSSTR_MAX_ROPE_DEPTH) {
if(len1+len2 > JSSTR_MAX_LENGTH)
return NULL;
rope = heap_alloc(sizeof(*rope));
if(!rope)
return NULL;
jsstr_init(&rope->str, len1+len2, JSSTR_ROPE);
rope->left = jsstr_addref(str1);
rope->right = jsstr_addref(str2);
rope->depth = depth;
return &rope->str;
}
}
ptr = jsstr_alloc_buf(len1+len2, &ret); ptr = jsstr_alloc_buf(len1+len2, &ret);
if(!ret) if(!ret)
return NULL; return NULL;
...@@ -89,6 +253,28 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2) ...@@ -89,6 +253,28 @@ jsstr_t *jsstr_concat(jsstr_t *str1, jsstr_t *str2)
jsstr_flush(str1, ptr); jsstr_flush(str1, ptr);
jsstr_flush(str2, ptr+len1); jsstr_flush(str2, ptr+len1);
return ret; return ret;
}
C_ASSERT(sizeof(jsstr_heap_t) <= sizeof(jsstr_rope_t));
const WCHAR *jsstr_rope_flatten(jsstr_rope_t *str)
{
WCHAR *buf;
buf = heap_alloc((jsstr_length(&str->str)+1) * sizeof(WCHAR));
if(!buf)
return NULL;
jsstr_flush(str->left, buf);
jsstr_flush(str->right, buf+jsstr_length(str->left));
buf[jsstr_length(&str->str)] = 0;
/* Trasform to heap string */
jsstr_release(str->left);
jsstr_release(str->right);
str->str.length_flags |= JSSTR_FLAG_FLAT;
return jsstr_as_heap(&str->str)->buf = buf;
} }
static jsstr_t *empty_str, *nan_str, *undefined_str; static jsstr_t *empty_str, *nan_str, *undefined_str;
......
...@@ -16,6 +16,23 @@ ...@@ -16,6 +16,23 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/ */
/*
* This is a common header for all string representations. The exact layout of the string
* representation may be:
*
* - inline string - string bytes directly follow string headers.
* - heap string - a structure containing a pointer to buffer on the heap.
* - roper string - a product of concatenation of two strings. Instead of copying whole
* buffers, we may store just references to concatenated strings.
*
* String layout may change over life time of the string. Currently possible transformation
* is when a rope string becomes a heap stream. That happens when we need a real, linear
* zero-terminated buffer (a flat buffer). At this point the type of the string is changed
* and the new buffer is stored in the string, so that subsequent operations requiring
* a flat string won't need to flatten it again.
*
* In the future more layouts and transformations may be added.
*/
struct _jsstr_t { struct _jsstr_t {
unsigned length_flags; unsigned length_flags;
unsigned ref; unsigned ref;
...@@ -25,18 +42,60 @@ struct _jsstr_t { ...@@ -25,18 +42,60 @@ struct _jsstr_t {
#define JSSTR_MAX_LENGTH (1 << (32-JSSTR_LENGTH_SHIFT)) #define JSSTR_MAX_LENGTH (1 << (32-JSSTR_LENGTH_SHIFT))
#define JSSTR_FLAGS_MASK ((1 << JSSTR_LENGTH_SHIFT)-1) #define JSSTR_FLAGS_MASK ((1 << JSSTR_LENGTH_SHIFT)-1)
#define JSSTR_FLAG_NULLBSTR 1 #define JSSTR_FLAG_NULLBSTR 4
#define JSSTR_FLAG_LBIT 1
#define JSSTR_FLAG_FLAT 2
#define JSSTR_FLAG_TAG_MASK 3
typedef enum {
JSSTR_INLINE = JSSTR_FLAG_FLAT,
JSSTR_HEAP = JSSTR_FLAG_FLAT|JSSTR_FLAG_LBIT,
JSSTR_ROPE = JSSTR_FLAG_LBIT
} jsstr_tag_t;
static inline unsigned jsstr_length(jsstr_t *str) static inline unsigned jsstr_length(jsstr_t *str)
{ {
return str->length_flags >> JSSTR_LENGTH_SHIFT; return str->length_flags >> JSSTR_LENGTH_SHIFT;
} }
static inline jsstr_tag_t jsstr_tag(jsstr_t *str)
{
return str->length_flags & JSSTR_FLAG_TAG_MASK;
}
static inline BOOL jsstr_is_inline(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_INLINE;
}
static inline BOOL jsstr_is_heap(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_HEAP;
}
static inline BOOL jsstr_is_rope(jsstr_t *str)
{
return jsstr_tag(str) == JSSTR_ROPE;
}
typedef struct { typedef struct {
jsstr_t str; jsstr_t str;
WCHAR buf[1]; WCHAR buf[1];
} jsstr_inline_t; } jsstr_inline_t;
typedef struct {
jsstr_t str;
WCHAR *buf;
} jsstr_heap_t;
typedef struct {
jsstr_t str;
jsstr_t *left;
jsstr_t *right;
unsigned depth;
} jsstr_rope_t;
jsstr_t *jsstr_alloc_len(const WCHAR*,unsigned) DECLSPEC_HIDDEN; jsstr_t *jsstr_alloc_len(const WCHAR*,unsigned) DECLSPEC_HIDDEN;
WCHAR *jsstr_alloc_buf(unsigned,jsstr_t**) DECLSPEC_HIDDEN; WCHAR *jsstr_alloc_buf(unsigned,jsstr_t**) DECLSPEC_HIDDEN;
...@@ -45,10 +104,16 @@ static inline jsstr_t *jsstr_alloc(const WCHAR *str) ...@@ -45,10 +104,16 @@ static inline jsstr_t *jsstr_alloc(const WCHAR *str)
return jsstr_alloc_len(str, strlenW(str)); return jsstr_alloc_len(str, strlenW(str));
} }
void jsstr_free(jsstr_t*) DECLSPEC_HIDDEN;
static inline void jsstr_release(jsstr_t *str) static inline void jsstr_release(jsstr_t *str)
{ {
if(!--str->ref) if(!--str->ref) {
heap_free(str); if(jsstr_is_inline(str))
heap_free(str);
else
jsstr_free(str);
}
} }
static inline jsstr_t *jsstr_addref(jsstr_t *str) static inline jsstr_t *jsstr_addref(jsstr_t *str)
...@@ -62,27 +127,39 @@ static inline jsstr_inline_t *jsstr_as_inline(jsstr_t *str) ...@@ -62,27 +127,39 @@ static inline jsstr_inline_t *jsstr_as_inline(jsstr_t *str)
return CONTAINING_RECORD(str, jsstr_inline_t, str); return CONTAINING_RECORD(str, jsstr_inline_t, str);
} }
/* This will be failable in the future. */ static inline jsstr_heap_t *jsstr_as_heap(jsstr_t *str)
static inline const WCHAR *jsstr_flatten(jsstr_t *str)
{ {
return jsstr_as_inline(str)->buf; return CONTAINING_RECORD(str, jsstr_heap_t, str);
} }
static inline BOOL jsstr_eq(jsstr_t *str1, jsstr_t *str2) static inline jsstr_rope_t *jsstr_as_rope(jsstr_t *str)
{ {
unsigned len = jsstr_length(str1); return CONTAINING_RECORD(str, jsstr_rope_t, str);
return len == jsstr_length(str2) && !memcmp(jsstr_as_inline(str1)->buf, jsstr_as_inline(str2)->buf, len*sizeof(WCHAR));
} }
static inline void jsstr_extract(jsstr_t *str, unsigned off, unsigned len, WCHAR *buf) const WCHAR *jsstr_rope_flatten(jsstr_rope_t*) DECLSPEC_HIDDEN;
static inline const WCHAR *jsstr_flatten(jsstr_t *str)
{ {
memcpy(buf, jsstr_as_inline(str)->buf+off, len*sizeof(WCHAR)); return jsstr_is_inline(str) ? jsstr_as_inline(str)->buf
: jsstr_is_heap(str) ? jsstr_as_heap(str)->buf
: jsstr_rope_flatten(jsstr_as_rope(str));
} }
void jsstr_extract(jsstr_t*,unsigned,unsigned,WCHAR*) DECLSPEC_HIDDEN;
static inline unsigned jsstr_flush(jsstr_t *str, WCHAR *buf) static inline unsigned jsstr_flush(jsstr_t *str, WCHAR *buf)
{ {
unsigned len = jsstr_length(str); unsigned len = jsstr_length(str);
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR)); if(jsstr_is_inline(str)) {
memcpy(buf, jsstr_as_inline(str)->buf, len*sizeof(WCHAR));
}else if(jsstr_is_heap(str)) {
memcpy(buf, jsstr_as_heap(str)->buf, len*sizeof(WCHAR));
}else {
jsstr_rope_t *rope = jsstr_as_rope(str);
jsstr_flush(rope->left, buf);
jsstr_flush(rope->right, buf+jsstr_length(rope->left));
}
return len; return len;
} }
...@@ -98,6 +175,12 @@ static inline jsstr_t *jsstr_substr(jsstr_t *str, unsigned off, unsigned len) ...@@ -98,6 +175,12 @@ static inline jsstr_t *jsstr_substr(jsstr_t *str, unsigned off, unsigned len)
} }
int jsstr_cmp(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN; int jsstr_cmp(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
static inline BOOL jsstr_eq(jsstr_t *left, jsstr_t *right)
{
return jsstr_length(left) == jsstr_length(right) && !jsstr_cmp(left, right);
}
jsstr_t *jsstr_concat(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN; jsstr_t *jsstr_concat(jsstr_t*,jsstr_t*) DECLSPEC_HIDDEN;
jsstr_t *jsstr_nan(void) DECLSPEC_HIDDEN; jsstr_t *jsstr_nan(void) DECLSPEC_HIDDEN;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment