diff --git a/configure.in b/configure.in index a6165281e34f2b6b01d0458f9b1bddfabacf0205..2054fb2b730b53f8c777c47637bf9e678ae8c8c8 100644 --- a/configure.in +++ b/configure.in @@ -74,7 +74,7 @@ GNOMEUI_VERSION=2.2.0 GCONF_VERSION=1.2.1 STARTUP_NOTIFICATION_VERSION=0.8 DBUS_VERSION=0.60 -SQLITE_VERSION=3.8.10.1 +SQLITE_VERSION=3.8.11.1 MSMANIFEST_TOOL= diff --git a/db/sqlite3/src/sqlite3.c b/db/sqlite3/src/sqlite3.c index 6708aa2d8221045e2c80cd939fd57675fc174afe..1344938873b4ec0840afde73908e47a18bfc7a58 100644 --- a/db/sqlite3/src/sqlite3.c +++ b/db/sqlite3/src/sqlite3.c @@ -1,6 +1,6 @@ /****************************************************************************** ** This file is an amalgamation of many separate C source files from SQLite -** version 3.8.10.1. By combining all the individual C code files into this +** version 3.8.11.1. By combining all the individual C code files into this ** single large file, the entire code can be compiled as a single translation ** unit. This allows many compilers to do optimizations that would not be ** possible if the files were compiled separately. Performance improvements @@ -158,6 +158,13 @@ # define _LARGEFILE_SOURCE 1 #endif +/* What version of GCC is being used. 0 means GCC is not being used */ +#ifdef __GNUC__ +# define GCC_VERSION (__GNUC__*1000000+__GNUC_MINOR__*1000+__GNUC_PATCHLEVEL__) +#else +# define GCC_VERSION 0 +#endif + /* Needed for various definitions... */ #if defined(__GNUC__) && !defined(_GNU_SOURCE) # define _GNU_SOURCE @@ -230,7 +237,7 @@ ** ** The official C-language API documentation for SQLite is derived ** from comments in this file. This file is the authoritative source -** on how SQLite interfaces are suppose to operate. +** on how SQLite interfaces are supposed to operate. ** ** The name of this file under configuration management is "sqlite.h.in". ** The makefile makes some minor changes to this file (such as inserting @@ -318,9 +325,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.8.10.1" -#define SQLITE_VERSION_NUMBER 3008010 -#define SQLITE_SOURCE_ID "2015-05-09 12:14:55 05b4b1f2a937c06c90db70c09890038f6c98ec40" +#define SQLITE_VERSION "3.8.11.1" +#define SQLITE_VERSION_NUMBER 3008011 +#define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -1170,6 +1177,14 @@ struct sqlite3_io_methods { ** circumstances in order to fix a problem with priority inversion. ** Applications should <em>not</em> use this file-control. ** +** <li>[[SQLITE_FCNTL_ZIPVFS]] +** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other +** VFS should return SQLITE_NOTFOUND for this opcode. +** +** <li>[[SQLITE_FCNTL_RBU]] +** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by +** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for +** this opcode. ** </ul> */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -1195,6 +1210,8 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_COMMIT_PHASETWO 22 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 #define SQLITE_FCNTL_WAL_BLOCK 24 +#define SQLITE_FCNTL_ZIPVFS 25 +#define SQLITE_FCNTL_RBU 26 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -3597,7 +3614,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_busy(sqlite3_stmt*); ** Some interfaces require a protected sqlite3_value. Other interfaces ** will accept either a protected or an unprotected sqlite3_value. ** Every interface that accepts sqlite3_value arguments specifies -** whether or not it requires a protected sqlite3_value. +** whether or not it requires a protected sqlite3_value. The +** [sqlite3_value_dup()] interface can be used to construct a new +** protected sqlite3_value from an unprotected sqlite3_value. ** ** The terms "protected" and "unprotected" refer to whether or not ** a mutex is held. An internal mutex is held for a protected @@ -3757,6 +3776,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_text64(sqlite3_stmt*, int, const char void(*)(void*), unsigned char encoding); SQLITE_API int SQLITE_STDCALL sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n); +SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt*, int, sqlite3_uint64); /* ** CAPI3REF: Number Of SQL Parameters @@ -4100,8 +4120,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** KEYWORDS: {column access functions} ** METHOD: sqlite3_stmt ** -** These routines form the "result set" interface. -** ** ^These routines return information about a single column of the current ** result row of a query. ^In every case the first argument is a pointer ** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*] @@ -4161,13 +4179,14 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** even empty strings, are always zero-terminated. ^The return ** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer. ** -** ^The object returned by [sqlite3_column_value()] is an -** [unprotected sqlite3_value] object. An unprotected sqlite3_value object -** may only be used with [sqlite3_bind_value()] and [sqlite3_result_value()]. +** <b>Warning:</b> ^The object returned by [sqlite3_column_value()] is an +** [unprotected sqlite3_value] object. In a multithreaded environment, +** an unprotected sqlite3_value object may only be used safely with +** [sqlite3_bind_value()] and [sqlite3_result_value()]. ** If the [unprotected sqlite3_value] object returned by ** [sqlite3_column_value()] is used in any other way, including calls ** to routines like [sqlite3_value_int()], [sqlite3_value_text()], -** or [sqlite3_value_bytes()], then the behavior is undefined. +** or [sqlite3_value_bytes()], the behavior is not threadsafe. ** ** These routines attempt to convert the value where appropriate. ^For ** example, if the internal representation is FLOAT and a text result @@ -4198,12 +4217,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** </table> ** </blockquote>)^ ** -** The table above makes reference to standard C library functions atoi() -** and atof(). SQLite does not really use these functions. It has its -** own equivalent internal routines. The atoi() and atof() names are -** used in the table for brevity and because they are familiar to most -** C programmers. -** ** Note that when type conversions occur, pointers returned by prior ** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or ** sqlite3_column_text16() may be invalidated. @@ -4228,7 +4241,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** of conversion are done in place when it is possible, but sometimes they ** are not possible and in those cases prior pointers are invalidated. ** -** The safest and easiest to remember policy is to invoke these routines +** The safest policy is to invoke these routines ** in one of the following ways: ** ** <ul> @@ -4248,7 +4261,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** ^The pointers returned are valid until a type conversion occurs as ** described above, or until [sqlite3_step()] or [sqlite3_reset()] or ** [sqlite3_finalize()] is called. ^The memory space used to hold strings -** and BLOBs is freed automatically. Do <b>not</b> pass the pointers returned +** and BLOBs is freed automatically. Do <em>not</em> pass the pointers returned ** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into ** [sqlite3_free()]. ** @@ -4498,12 +4511,12 @@ SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_memory_alarm(void(*)(voi #endif /* -** CAPI3REF: Obtaining SQL Function Parameter Values +** CAPI3REF: Obtaining SQL Values ** METHOD: sqlite3_value ** ** The C-language implementation of SQL functions and aggregates uses ** this set of interface routines to access the parameter values on -** the function or aggregate. +** the function or aggregate. ** ** The xFunc (for scalar functions) or xStep (for aggregates) parameters ** to [sqlite3_create_function()] and [sqlite3_create_function16()] @@ -4556,6 +4569,23 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*); +/* +** CAPI3REF: Copy And Free SQL Values +** METHOD: sqlite3_value +** +** ^The sqlite3_value_dup(V) interface makes a copy of the [sqlite3_value] +** object D and returns a pointer to that copy. ^The [sqlite3_value] returned +** is a [protected sqlite3_value] object even if the input is not. +** ^The sqlite3_value_dup(V) interface returns NULL if V is NULL or if a +** memory allocation fails. +** +** ^The sqlite3_value_free(V) interface frees an [sqlite3_value] object +** previously obtained from [sqlite3_value_dup()]. ^If V is a NULL pointer +** then sqlite3_value_free(V) is a harmless no-op. +*/ +SQLITE_API SQLITE_EXPERIMENTAL sqlite3_value *SQLITE_STDCALL sqlite3_value_dup(const sqlite3_value*); +SQLITE_API SQLITE_EXPERIMENTAL void SQLITE_STDCALL sqlite3_value_free(sqlite3_value*); + /* ** CAPI3REF: Obtain Aggregate Function Context ** METHOD: sqlite3_context @@ -4719,9 +4749,9 @@ typedef void (*sqlite3_destructor_type)(void*); ** to by the second parameter and which is N bytes long where N is the ** third parameter. ** -** ^The sqlite3_result_zeroblob() interfaces set the result of -** the application-defined function to be a BLOB containing all zero -** bytes and N bytes in size, where N is the value of the 2nd parameter. +** ^The sqlite3_result_zeroblob(C,N) and sqlite3_result_zeroblob64(C,N) +** interfaces set the result of the application-defined function to be +** a BLOB containing all zero bytes and N bytes in size. ** ** ^The sqlite3_result_double() interface sets the result from ** an application-defined function to be a floating point value specified @@ -4803,7 +4833,7 @@ typedef void (*sqlite3_destructor_type)(void*); ** from [sqlite3_malloc()] before it returns. ** ** ^The sqlite3_result_value() interface sets the result of -** the application-defined function to be a copy the +** the application-defined function to be a copy of the ** [unprotected sqlite3_value] object specified by the 2nd parameter. ^The ** sqlite3_result_value() interface makes a copy of the [sqlite3_value] ** so that the [sqlite3_value] specified in the parameter may change or @@ -4836,6 +4866,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_text16le(sqlite3_context*, const v SQLITE_API void SQLITE_STDCALL sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*)); SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_value*); SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n); +SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n); /* ** CAPI3REF: Define New Collating Sequences @@ -6079,7 +6110,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_blob_open( ** ** ^This function sets the database handle error code and message. */ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); +SQLITE_API int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); /* ** CAPI3REF: Close A BLOB Handle @@ -6476,6 +6507,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_mutex_notheld(sqlite3_mutex*); #define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ +#define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ +#define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ +#define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ /* ** CAPI3REF: Retrieve the mutex for a database connection @@ -7889,7 +7923,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vtab_on_conflict(sqlite3 *); ** ** See also: [sqlite3_stmt_scanstatus_reset()] */ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_stmt_scanstatus( +SQLITE_API int SQLITE_STDCALL sqlite3_stmt_scanstatus( sqlite3_stmt *pStmt, /* Prepared statement for which info desired */ int idx, /* Index of loop to report on */ int iScanStatusOp, /* Information desired. SQLITE_SCANSTAT_* */ @@ -7905,7 +7939,7 @@ SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_stmt_scanstatus( ** This API is only available if the library is built with pre-processor ** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined. */ -SQLITE_API SQLITE_EXPERIMENTAL void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); +SQLITE_API void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); /* @@ -8020,6 +8054,8 @@ struct sqlite3_rtree_query_info { int eParentWithin; /* Visibility of parent node */ int eWithin; /* OUT: Visiblity */ sqlite3_rtree_dbl rScore; /* OUT: Write the score here */ + /* The following fields are only available in 3.8.11 and later */ + sqlite3_value **apSqlParam; /* Original SQL values of parameters */ }; /* @@ -8325,6 +8361,20 @@ struct sqlite3_rtree_query_info { # define SQLITE_NOINLINE #endif +/* +** Make sure that the compiler intrinsics we desire are enabled when +** compiling with an appropriate version of MSVC. +*/ +#if defined(_MSC_VER) && _MSC_VER>=1300 +# if !defined(_WIN32_WCE) +# include <intrin.h> +# pragma intrinsic(_byteswap_ushort) +# pragma intrinsic(_byteswap_ulong) +# else +# include <cmnintrin.h> +# endif +#endif + /* ** The SQLITE_THREADSAFE macro must be defined as 0, 1, or 2. ** 0 means mutexes are permanently disable and the library is never @@ -8902,6 +8952,16 @@ SQLITE_PRIVATE void sqlite3HashClear(Hash*); # define SQLITE_MAX_WORKER_THREADS SQLITE_DEFAULT_WORKER_THREADS #endif +/* +** The default initial allocation for the pagecache when using separate +** pagecaches for each database connection. A positive number is the +** number of pages. A negative number N translations means that a buffer +** of -1024*N bytes is allocated and used for as many pages as it will hold. +*/ +#ifndef SQLITE_DEFAULT_PCACHE_INITSZ +# define SQLITE_DEFAULT_PCACHE_INITSZ 100 +#endif + /* ** GCC does not define the offsetof() macro so we'll have to do it @@ -9137,7 +9197,9 @@ SQLITE_PRIVATE const int sqlite3one; # if defined(__linux__) \ || defined(_WIN32) \ || (defined(__APPLE__) && defined(__MACH__)) \ - || defined(__sun) + || defined(__sun) \ + || defined(__FreeBSD__) \ + || defined(__DragonFly__) # define SQLITE_MAX_MMAP_SIZE 0x7fff0000 /* 2147418112 */ # else # define SQLITE_MAX_MMAP_SIZE 0 @@ -9667,13 +9729,14 @@ struct VdbeOp { int p1; /* First operand */ int p2; /* Second parameter (often the jump destination) */ int p3; /* The third parameter */ - union { /* fourth parameter */ + union p4union { /* fourth parameter */ int i; /* Integer value if p4type==P4_INT32 */ void *p; /* Generic pointer */ char *z; /* Pointer to data for string (char array) types */ i64 *pI64; /* Used when p4type is P4_INT64 */ double *pReal; /* Used when p4type is P4_REAL */ FuncDef *pFunc; /* Used when p4type is P4_FUNCDEF */ + sqlite3_context *pCtx; /* Used when p4type is P4_FUNCCTX */ CollSeq *pColl; /* Used when p4type is P4_COLLSEQ */ Mem *pMem; /* Used when p4type is P4_MEM */ VTable *pVtab; /* Used when p4type is P4_VTAB */ @@ -9740,6 +9803,7 @@ typedef struct VdbeOpList VdbeOpList; #define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */ #define P4_SUBPROGRAM (-18) /* P4 is a pointer to a SubProgram structure */ #define P4_ADVANCE (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */ +#define P4_FUNCCTX (-20) /* P4 is a pointer to an sqlite3_context object */ /* Error message codes for OP_Halt */ #define P5_ConstraintNotNull 1 @@ -9782,42 +9846,42 @@ typedef struct VdbeOpList VdbeOpList; /************** Begin file opcodes.h *****************************************/ /* Automatically generated. Do not edit */ /* See the mkopcodeh.awk script for details */ -#define OP_Function 1 /* synopsis: r[P3]=func(r[P2@P5]) */ -#define OP_Savepoint 2 -#define OP_AutoCommit 3 -#define OP_Transaction 4 -#define OP_SorterNext 5 -#define OP_PrevIfOpen 6 -#define OP_NextIfOpen 7 -#define OP_Prev 8 -#define OP_Next 9 -#define OP_AggStep 10 /* synopsis: accum=r[P3] step(r[P2@P5]) */ -#define OP_Checkpoint 11 -#define OP_JournalMode 12 -#define OP_Vacuum 13 -#define OP_VFilter 14 /* synopsis: iplan=r[P3] zplan='P4' */ -#define OP_VUpdate 15 /* synopsis: data=r[P3@P2] */ -#define OP_Goto 16 -#define OP_Gosub 17 -#define OP_Return 18 +#define OP_Savepoint 1 +#define OP_AutoCommit 2 +#define OP_Transaction 3 +#define OP_SorterNext 4 +#define OP_PrevIfOpen 5 +#define OP_NextIfOpen 6 +#define OP_Prev 7 +#define OP_Next 8 +#define OP_Checkpoint 9 +#define OP_JournalMode 10 +#define OP_Vacuum 11 +#define OP_VFilter 12 /* synopsis: iplan=r[P3] zplan='P4' */ +#define OP_VUpdate 13 /* synopsis: data=r[P3@P2] */ +#define OP_Goto 14 +#define OP_Gosub 15 +#define OP_Return 16 +#define OP_InitCoroutine 17 +#define OP_EndCoroutine 18 #define OP_Not 19 /* same as TK_NOT, synopsis: r[P2]= !r[P1] */ -#define OP_InitCoroutine 20 -#define OP_EndCoroutine 21 -#define OP_Yield 22 -#define OP_HaltIfNull 23 /* synopsis: if r[P3]=null halt */ -#define OP_Halt 24 -#define OP_Integer 25 /* synopsis: r[P2]=P1 */ -#define OP_Int64 26 /* synopsis: r[P2]=P4 */ -#define OP_String 27 /* synopsis: r[P2]='P4' (len=P1) */ -#define OP_Null 28 /* synopsis: r[P2..P3]=NULL */ -#define OP_SoftNull 29 /* synopsis: r[P1]=NULL */ -#define OP_Blob 30 /* synopsis: r[P2]=P4 (len=P1) */ -#define OP_Variable 31 /* synopsis: r[P2]=parameter(P1,P4) */ -#define OP_Move 32 /* synopsis: r[P2@P3]=r[P1@P3] */ -#define OP_Copy 33 /* synopsis: r[P2@P3+1]=r[P1@P3+1] */ -#define OP_SCopy 34 /* synopsis: r[P2]=r[P1] */ -#define OP_ResultRow 35 /* synopsis: output=r[P1@P2] */ -#define OP_CollSeq 36 +#define OP_Yield 20 +#define OP_HaltIfNull 21 /* synopsis: if r[P3]=null halt */ +#define OP_Halt 22 +#define OP_Integer 23 /* synopsis: r[P2]=P1 */ +#define OP_Int64 24 /* synopsis: r[P2]=P4 */ +#define OP_String 25 /* synopsis: r[P2]='P4' (len=P1) */ +#define OP_Null 26 /* synopsis: r[P2..P3]=NULL */ +#define OP_SoftNull 27 /* synopsis: r[P1]=NULL */ +#define OP_Blob 28 /* synopsis: r[P2]=P4 (len=P1) */ +#define OP_Variable 29 /* synopsis: r[P2]=parameter(P1,P4) */ +#define OP_Move 30 /* synopsis: r[P2@P3]=r[P1@P3] */ +#define OP_Copy 31 /* synopsis: r[P2@P3+1]=r[P1@P3+1] */ +#define OP_SCopy 32 /* synopsis: r[P2]=r[P1] */ +#define OP_ResultRow 33 /* synopsis: output=r[P1@P2] */ +#define OP_CollSeq 34 +#define OP_Function0 35 /* synopsis: r[P3]=func(r[P2@P5]) */ +#define OP_Function 36 /* synopsis: r[P3]=func(r[P2@P5]) */ #define OP_AddImm 37 /* synopsis: r[P1]=r[P1]+P2 */ #define OP_MustBeInt 38 #define OP_RealAffinity 39 @@ -9843,20 +9907,20 @@ typedef struct VdbeOpList VdbeOpList; #define OP_SequenceTest 59 /* synopsis: if( cursor[P1].ctr++ ) pc = P2 */ #define OP_OpenPseudo 60 /* synopsis: P3 columns in r[P2] */ #define OP_Close 61 -#define OP_SeekLT 62 /* synopsis: key=r[P3@P4] */ -#define OP_SeekLE 63 /* synopsis: key=r[P3@P4] */ -#define OP_SeekGE 64 /* synopsis: key=r[P3@P4] */ -#define OP_SeekGT 65 /* synopsis: key=r[P3@P4] */ -#define OP_Seek 66 /* synopsis: intkey=r[P2] */ -#define OP_NoConflict 67 /* synopsis: key=r[P3@P4] */ -#define OP_NotFound 68 /* synopsis: key=r[P3@P4] */ -#define OP_Found 69 /* synopsis: key=r[P3@P4] */ -#define OP_NotExists 70 /* synopsis: intkey=r[P3] */ +#define OP_ColumnsUsed 62 +#define OP_SeekLT 63 /* synopsis: key=r[P3@P4] */ +#define OP_SeekLE 64 /* synopsis: key=r[P3@P4] */ +#define OP_SeekGE 65 /* synopsis: key=r[P3@P4] */ +#define OP_SeekGT 66 /* synopsis: key=r[P3@P4] */ +#define OP_Seek 67 /* synopsis: intkey=r[P2] */ +#define OP_NoConflict 68 /* synopsis: key=r[P3@P4] */ +#define OP_NotFound 69 /* synopsis: key=r[P3@P4] */ +#define OP_Found 70 /* synopsis: key=r[P3@P4] */ #define OP_Or 71 /* same as TK_OR, synopsis: r[P3]=(r[P1] || r[P2]) */ #define OP_And 72 /* same as TK_AND, synopsis: r[P3]=(r[P1] && r[P2]) */ -#define OP_Sequence 73 /* synopsis: r[P2]=cursor[P1].ctr++ */ -#define OP_NewRowid 74 /* synopsis: r[P2]=rowid */ -#define OP_Insert 75 /* synopsis: intkey=r[P3] data=r[P2] */ +#define OP_NotExists 73 /* synopsis: intkey=r[P3] */ +#define OP_Sequence 74 /* synopsis: r[P2]=cursor[P1].ctr++ */ +#define OP_NewRowid 75 /* synopsis: r[P2]=rowid */ #define OP_IsNull 76 /* same as TK_ISNULL, synopsis: if r[P1]==NULL goto P2 */ #define OP_NotNull 77 /* same as TK_NOTNULL, synopsis: if r[P1]!=NULL goto P2 */ #define OP_Ne 78 /* same as TK_NE, synopsis: if r[P1]!=r[P3] goto P2 */ @@ -9865,7 +9929,7 @@ typedef struct VdbeOpList VdbeOpList; #define OP_Le 81 /* same as TK_LE, synopsis: if r[P1]<=r[P3] goto P2 */ #define OP_Lt 82 /* same as TK_LT, synopsis: if r[P1]<r[P3] goto P2 */ #define OP_Ge 83 /* same as TK_GE, synopsis: if r[P1]>=r[P3] goto P2 */ -#define OP_InsertInt 84 /* synopsis: intkey=P3 data=r[P2] */ +#define OP_Insert 84 /* synopsis: intkey=r[P3] data=r[P2] */ #define OP_BitAnd 85 /* same as TK_BITAND, synopsis: r[P3]=r[P1]&r[P2] */ #define OP_BitOr 86 /* same as TK_BITOR, synopsis: r[P3]=r[P1]|r[P2] */ #define OP_ShiftLeft 87 /* same as TK_LSHIFT, synopsis: r[P3]=r[P2]<<r[P1] */ @@ -9876,69 +9940,72 @@ typedef struct VdbeOpList VdbeOpList; #define OP_Divide 92 /* same as TK_SLASH, synopsis: r[P3]=r[P2]/r[P1] */ #define OP_Remainder 93 /* same as TK_REM, synopsis: r[P3]=r[P2]%r[P1] */ #define OP_Concat 94 /* same as TK_CONCAT, synopsis: r[P3]=r[P2]+r[P1] */ -#define OP_Delete 95 +#define OP_InsertInt 95 /* synopsis: intkey=P3 data=r[P2] */ #define OP_BitNot 96 /* same as TK_BITNOT, synopsis: r[P1]= ~r[P1] */ #define OP_String8 97 /* same as TK_STRING, synopsis: r[P2]='P4' */ -#define OP_ResetCount 98 -#define OP_SorterCompare 99 /* synopsis: if key(P1)!=trim(r[P3],P4) goto P2 */ -#define OP_SorterData 100 /* synopsis: r[P2]=data */ -#define OP_RowKey 101 /* synopsis: r[P2]=key */ -#define OP_RowData 102 /* synopsis: r[P2]=data */ -#define OP_Rowid 103 /* synopsis: r[P2]=rowid */ -#define OP_NullRow 104 -#define OP_Last 105 -#define OP_SorterSort 106 -#define OP_Sort 107 -#define OP_Rewind 108 -#define OP_SorterInsert 109 -#define OP_IdxInsert 110 /* synopsis: key=r[P2] */ -#define OP_IdxDelete 111 /* synopsis: key=r[P2@P3] */ -#define OP_IdxRowid 112 /* synopsis: r[P2]=rowid */ -#define OP_IdxLE 113 /* synopsis: key=r[P3@P4] */ -#define OP_IdxGT 114 /* synopsis: key=r[P3@P4] */ -#define OP_IdxLT 115 /* synopsis: key=r[P3@P4] */ -#define OP_IdxGE 116 /* synopsis: key=r[P3@P4] */ -#define OP_Destroy 117 -#define OP_Clear 118 -#define OP_ResetSorter 119 -#define OP_CreateIndex 120 /* synopsis: r[P2]=root iDb=P1 */ -#define OP_CreateTable 121 /* synopsis: r[P2]=root iDb=P1 */ -#define OP_ParseSchema 122 -#define OP_LoadAnalysis 123 -#define OP_DropTable 124 -#define OP_DropIndex 125 -#define OP_DropTrigger 126 -#define OP_IntegrityCk 127 -#define OP_RowSetAdd 128 /* synopsis: rowset(P1)=r[P2] */ -#define OP_RowSetRead 129 /* synopsis: r[P3]=rowset(P1) */ -#define OP_RowSetTest 130 /* synopsis: if r[P3] in rowset(P1) goto P2 */ -#define OP_Program 131 -#define OP_Param 132 +#define OP_Delete 98 +#define OP_ResetCount 99 +#define OP_SorterCompare 100 /* synopsis: if key(P1)!=trim(r[P3],P4) goto P2 */ +#define OP_SorterData 101 /* synopsis: r[P2]=data */ +#define OP_RowKey 102 /* synopsis: r[P2]=key */ +#define OP_RowData 103 /* synopsis: r[P2]=data */ +#define OP_Rowid 104 /* synopsis: r[P2]=rowid */ +#define OP_NullRow 105 +#define OP_Last 106 +#define OP_SorterSort 107 +#define OP_Sort 108 +#define OP_Rewind 109 +#define OP_SorterInsert 110 +#define OP_IdxInsert 111 /* synopsis: key=r[P2] */ +#define OP_IdxDelete 112 /* synopsis: key=r[P2@P3] */ +#define OP_IdxRowid 113 /* synopsis: r[P2]=rowid */ +#define OP_IdxLE 114 /* synopsis: key=r[P3@P4] */ +#define OP_IdxGT 115 /* synopsis: key=r[P3@P4] */ +#define OP_IdxLT 116 /* synopsis: key=r[P3@P4] */ +#define OP_IdxGE 117 /* synopsis: key=r[P3@P4] */ +#define OP_Destroy 118 +#define OP_Clear 119 +#define OP_ResetSorter 120 +#define OP_CreateIndex 121 /* synopsis: r[P2]=root iDb=P1 */ +#define OP_CreateTable 122 /* synopsis: r[P2]=root iDb=P1 */ +#define OP_ParseSchema 123 +#define OP_LoadAnalysis 124 +#define OP_DropTable 125 +#define OP_DropIndex 126 +#define OP_DropTrigger 127 +#define OP_IntegrityCk 128 +#define OP_RowSetAdd 129 /* synopsis: rowset(P1)=r[P2] */ +#define OP_RowSetRead 130 /* synopsis: r[P3]=rowset(P1) */ +#define OP_RowSetTest 131 /* synopsis: if r[P3] in rowset(P1) goto P2 */ +#define OP_Program 132 #define OP_Real 133 /* same as TK_FLOAT, synopsis: r[P2]=P4 */ -#define OP_FkCounter 134 /* synopsis: fkctr[P1]+=P2 */ -#define OP_FkIfZero 135 /* synopsis: if fkctr[P1]==0 goto P2 */ -#define OP_MemMax 136 /* synopsis: r[P1]=max(r[P1],r[P2]) */ -#define OP_IfPos 137 /* synopsis: if r[P1]>0 goto P2 */ -#define OP_IfNeg 138 /* synopsis: r[P1]+=P3, if r[P1]<0 goto P2 */ -#define OP_IfNotZero 139 /* synopsis: if r[P1]!=0 then r[P1]+=P3, goto P2 */ -#define OP_DecrJumpZero 140 /* synopsis: if (--r[P1])==0 goto P2 */ -#define OP_JumpZeroIncr 141 /* synopsis: if (r[P1]++)==0 ) goto P2 */ -#define OP_AggFinal 142 /* synopsis: accum=r[P1] N=P2 */ -#define OP_IncrVacuum 143 -#define OP_Expire 144 -#define OP_TableLock 145 /* synopsis: iDb=P1 root=P2 write=P3 */ -#define OP_VBegin 146 -#define OP_VCreate 147 -#define OP_VDestroy 148 -#define OP_VOpen 149 -#define OP_VColumn 150 /* synopsis: r[P3]=vcolumn(P2) */ -#define OP_VNext 151 -#define OP_VRename 152 -#define OP_Pagecount 153 -#define OP_MaxPgcnt 154 -#define OP_Init 155 /* synopsis: Start at P2 */ -#define OP_Noop 156 -#define OP_Explain 157 +#define OP_Param 134 +#define OP_FkCounter 135 /* synopsis: fkctr[P1]+=P2 */ +#define OP_FkIfZero 136 /* synopsis: if fkctr[P1]==0 goto P2 */ +#define OP_MemMax 137 /* synopsis: r[P1]=max(r[P1],r[P2]) */ +#define OP_IfPos 138 /* synopsis: if r[P1]>0 goto P2 */ +#define OP_IfNeg 139 /* synopsis: r[P1]+=P3, if r[P1]<0 goto P2 */ +#define OP_IfNotZero 140 /* synopsis: if r[P1]!=0 then r[P1]+=P3, goto P2 */ +#define OP_DecrJumpZero 141 /* synopsis: if (--r[P1])==0 goto P2 */ +#define OP_JumpZeroIncr 142 /* synopsis: if (r[P1]++)==0 ) goto P2 */ +#define OP_AggStep0 143 /* synopsis: accum=r[P3] step(r[P2@P5]) */ +#define OP_AggStep 144 /* synopsis: accum=r[P3] step(r[P2@P5]) */ +#define OP_AggFinal 145 /* synopsis: accum=r[P1] N=P2 */ +#define OP_IncrVacuum 146 +#define OP_Expire 147 +#define OP_TableLock 148 /* synopsis: iDb=P1 root=P2 write=P3 */ +#define OP_VBegin 149 +#define OP_VCreate 150 +#define OP_VDestroy 151 +#define OP_VOpen 152 +#define OP_VColumn 153 /* synopsis: r[P3]=vcolumn(P2) */ +#define OP_VNext 154 +#define OP_VRename 155 +#define OP_Pagecount 156 +#define OP_MaxPgcnt 157 +#define OP_Init 158 /* synopsis: Start at P2 */ +#define OP_Noop 159 +#define OP_Explain 160 /* Properties such as "out2" or "jump" that are specified in @@ -9952,26 +10019,27 @@ typedef struct VdbeOpList VdbeOpList; #define OPFLG_OUT2 0x0010 /* out2: P2 is an output */ #define OPFLG_OUT3 0x0020 /* out3: P3 is an output */ #define OPFLG_INITIALIZER {\ -/* 0 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01,\ -/* 8 */ 0x01, 0x01, 0x00, 0x00, 0x10, 0x00, 0x01, 0x00,\ -/* 16 */ 0x01, 0x01, 0x02, 0x12, 0x01, 0x02, 0x03, 0x08,\ -/* 24 */ 0x00, 0x10, 0x10, 0x10, 0x10, 0x00, 0x10, 0x10,\ -/* 32 */ 0x00, 0x00, 0x10, 0x00, 0x00, 0x02, 0x03, 0x02,\ +/* 0 */ 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,\ +/* 8 */ 0x01, 0x00, 0x10, 0x00, 0x01, 0x00, 0x01, 0x01,\ +/* 16 */ 0x02, 0x01, 0x02, 0x12, 0x03, 0x08, 0x00, 0x10,\ +/* 24 */ 0x10, 0x10, 0x10, 0x00, 0x10, 0x10, 0x00, 0x00,\ +/* 32 */ 0x10, 0x00, 0x00, 0x00, 0x00, 0x02, 0x03, 0x02,\ /* 40 */ 0x02, 0x00, 0x00, 0x01, 0x01, 0x03, 0x03, 0x00,\ /* 48 */ 0x00, 0x00, 0x10, 0x10, 0x08, 0x00, 0x00, 0x00,\ -/* 56 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09, 0x09,\ -/* 64 */ 0x09, 0x09, 0x04, 0x09, 0x09, 0x09, 0x09, 0x26,\ -/* 72 */ 0x26, 0x10, 0x10, 0x00, 0x03, 0x03, 0x0b, 0x0b,\ +/* 56 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,\ +/* 64 */ 0x09, 0x09, 0x09, 0x04, 0x09, 0x09, 0x09, 0x26,\ +/* 72 */ 0x26, 0x09, 0x10, 0x10, 0x03, 0x03, 0x0b, 0x0b,\ /* 80 */ 0x0b, 0x0b, 0x0b, 0x0b, 0x00, 0x26, 0x26, 0x26,\ /* 88 */ 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x26, 0x00,\ -/* 96 */ 0x12, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,\ -/* 104 */ 0x00, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x00,\ -/* 112 */ 0x10, 0x01, 0x01, 0x01, 0x01, 0x10, 0x00, 0x00,\ -/* 120 */ 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ -/* 128 */ 0x06, 0x23, 0x0b, 0x01, 0x10, 0x10, 0x00, 0x01,\ -/* 136 */ 0x04, 0x03, 0x03, 0x03, 0x03, 0x03, 0x00, 0x01,\ -/* 144 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,\ -/* 152 */ 0x00, 0x10, 0x10, 0x01, 0x00, 0x00,} +/* 96 */ 0x12, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 104 */ 0x10, 0x00, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04,\ +/* 112 */ 0x00, 0x10, 0x01, 0x01, 0x01, 0x01, 0x10, 0x00,\ +/* 120 */ 0x00, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 128 */ 0x00, 0x06, 0x23, 0x0b, 0x01, 0x10, 0x10, 0x00,\ +/* 136 */ 0x01, 0x04, 0x03, 0x03, 0x03, 0x03, 0x03, 0x00,\ +/* 144 */ 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,\ +/* 152 */ 0x00, 0x00, 0x01, 0x00, 0x10, 0x10, 0x01, 0x00,\ +/* 160 */ 0x00,} /************** End of opcodes.h *********************************************/ /************** Continuing where we left off in vdbe.h ***********************/ @@ -9986,6 +10054,7 @@ SQLITE_PRIVATE int sqlite3VdbeAddOp1(Vdbe*,int,int); SQLITE_PRIVATE int sqlite3VdbeAddOp2(Vdbe*,int,int,int); SQLITE_PRIVATE int sqlite3VdbeAddOp3(Vdbe*,int,int,int,int); SQLITE_PRIVATE int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int); +SQLITE_PRIVATE int sqlite3VdbeAddOp4Dup8(Vdbe*,int,int,int,int,const u8*,int); SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int); SQLITE_PRIVATE int sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp, int iLineno); SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe*,int,char*); @@ -10287,7 +10356,9 @@ SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager); /* Functions used to query pager state and configuration. */ SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager*); SQLITE_PRIVATE u32 sqlite3PagerDataVersion(Pager*); -SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*); +#ifdef SQLITE_DEBUG +SQLITE_PRIVATE int sqlite3PagerRefcount(Pager*); +#endif SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager*); SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager*, int); SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager*); @@ -10378,14 +10449,14 @@ struct PgHdr { }; /* Bit values for PgHdr.flags */ -#define PGHDR_DIRTY 0x002 /* Page has changed */ -#define PGHDR_NEED_SYNC 0x004 /* Fsync the rollback journal before - ** writing this page to the database */ -#define PGHDR_NEED_READ 0x008 /* Content is unread */ -#define PGHDR_REUSE_UNLIKELY 0x010 /* A hint that reuse is unlikely */ -#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ - -#define PGHDR_MMAP 0x040 /* This is an mmap page object */ +#define PGHDR_CLEAN 0x001 /* Page not on the PCache.pDirty list */ +#define PGHDR_DIRTY 0x002 /* Page is on the PCache.pDirty list */ +#define PGHDR_WRITEABLE 0x004 /* Journaled and ready to modify */ +#define PGHDR_NEED_SYNC 0x008 /* Fsync the rollback journal before + ** writing this page to the database */ +#define PGHDR_NEED_READ 0x010 /* Content is unread */ +#define PGHDR_DONT_WRITE 0x020 /* Do not write content to disk */ +#define PGHDR_MMAP 0x040 /* This is an mmap page object */ /* Initialize and shutdown the page cache subsystem */ SQLITE_PRIVATE int sqlite3PcacheInitialize(void); @@ -11169,6 +11240,7 @@ struct sqlite3 { #define SQLITE_QueryOnly 0x02000000 /* Disable database changes */ #define SQLITE_VdbeEQP 0x04000000 /* Debug EXPLAIN QUERY PLAN */ #define SQLITE_Vacuum 0x08000000 /* Currently in a VACUUM */ +#define SQLITE_CellSizeCk 0x10000000 /* Check btree cell sizes on load */ /* @@ -11416,9 +11488,9 @@ struct CollSeq { ** used as the P4 operand, they will be more readable. ** ** Note also that the numeric types are grouped together so that testing -** for a numeric type is a single comparison. And the NONE type is first. +** for a numeric type is a single comparison. And the BLOB type is first. */ -#define SQLITE_AFF_NONE 'A' +#define SQLITE_AFF_BLOB 'A' #define SQLITE_AFF_TEXT 'B' #define SQLITE_AFF_NUMERIC 'C' #define SQLITE_AFF_INTEGER 'D' @@ -11550,8 +11622,9 @@ struct Table { #define TF_HasPrimaryKey 0x04 /* Table has a primary key */ #define TF_Autoincrement 0x08 /* Integer primary key is autoincrement */ #define TF_Virtual 0x10 /* Is a virtual table */ -#define TF_WithoutRowid 0x20 /* No rowid used. PRIMARY KEY is the key */ -#define TF_OOOHidden 0x40 /* Out-of-Order hidden columns */ +#define TF_WithoutRowid 0x20 /* No rowid. PRIMARY KEY is the key */ +#define TF_NoVisibleRowid 0x40 /* No user-visible "rowid" column */ +#define TF_OOOHidden 0x80 /* Out-of-Order hidden columns */ /* @@ -11569,6 +11642,7 @@ struct Table { /* Does the table have a rowid */ #define HasRowid(X) (((X)->tabFlags & TF_WithoutRowid)==0) +#define VisibleRowid(X) (((X)->tabFlags & TF_NoVisibleRowid)==0) /* ** Each foreign key constraint is an instance of the following structure. @@ -11727,6 +11801,14 @@ struct UnpackedRecord { ** and the value of Index.onError indicate the which conflict resolution ** algorithm to employ whenever an attempt is made to insert a non-unique ** element. +** +** While parsing a CREATE TABLE or CREATE INDEX statement in order to +** generate VDBE code (as opposed to parsing one read from an sqlite_master +** table as part of parsing an existing database schema), transient instances +** of this structure may be created. In this case the Index.tnum variable is +** used to store the address of a VDBE instruction, not a database page +** number (it cannot - the database page is not allocated until the VDBE +** program is executed). See convertToWithoutRowidTable() for details. */ struct Index { char *zName; /* Name of this index */ @@ -12165,7 +12247,7 @@ struct SrcList { Expr *pOn; /* The ON clause of a join */ IdList *pUsing; /* The USING clause of a join */ Bitmask colUsed; /* Bit N (1<<N) set if column N of pTab is used */ - char *zIndex; /* Identifier from "INDEXED BY <zIndex>" clause */ + char *zIndexedBy; /* Identifier from "INDEXED BY <zIndex>" clause */ Index *pIndex; /* Index structure corresponding to zIndex, if any */ } a[1]; /* One entry for each identifier on the list */ }; @@ -12301,19 +12383,20 @@ struct Select { ** "Select Flag". */ #define SF_Distinct 0x0001 /* Output should be DISTINCT */ -#define SF_Resolved 0x0002 /* Identifiers have been resolved */ -#define SF_Aggregate 0x0004 /* Contains aggregate functions */ -#define SF_UsesEphemeral 0x0008 /* Uses the OpenEphemeral opcode */ -#define SF_Expanded 0x0010 /* sqlite3SelectExpand() called on this */ -#define SF_HasTypeInfo 0x0020 /* FROM subqueries have Table metadata */ -#define SF_Compound 0x0040 /* Part of a compound query */ -#define SF_Values 0x0080 /* Synthesized from VALUES clause */ -#define SF_MultiValue 0x0100 /* Single VALUES term with multiple rows */ -#define SF_NestedFrom 0x0200 /* Part of a parenthesized FROM clause */ -#define SF_MaybeConvert 0x0400 /* Need convertCompoundSelectToSubquery() */ -#define SF_Recursive 0x0800 /* The recursive part of a recursive CTE */ +#define SF_All 0x0002 /* Includes the ALL keyword */ +#define SF_Resolved 0x0004 /* Identifiers have been resolved */ +#define SF_Aggregate 0x0008 /* Contains aggregate functions */ +#define SF_UsesEphemeral 0x0010 /* Uses the OpenEphemeral opcode */ +#define SF_Expanded 0x0020 /* sqlite3SelectExpand() called on this */ +#define SF_HasTypeInfo 0x0040 /* FROM subqueries have Table metadata */ +#define SF_Compound 0x0080 /* Part of a compound query */ +#define SF_Values 0x0100 /* Synthesized from VALUES clause */ +#define SF_MultiValue 0x0200 /* Single VALUES term with multiple rows */ +#define SF_NestedFrom 0x0400 /* Part of a parenthesized FROM clause */ +#define SF_MaybeConvert 0x0800 /* Need convertCompoundSelectToSubquery() */ #define SF_MinMaxAgg 0x1000 /* Aggregate containing min() or max() */ -#define SF_Converted 0x2000 /* By convertCompoundSelectToSubquery() */ +#define SF_Recursive 0x2000 /* The recursive part of a recursive CTE */ +#define SF_Converted 0x4000 /* By convertCompoundSelectToSubquery() */ /* @@ -12555,7 +12638,6 @@ struct Parse { Parse *pToplevel; /* Parse structure for main program (or NULL) */ Table *pTriggerTab; /* Table triggers are being coded for */ int addrCrTab; /* Address of OP_CreateTable opcode on CREATE TABLE */ - int addrSkipPK; /* Address of instruction to skip PRIMARY KEY index */ u32 nQueryLoop; /* Est number of iterations of a query (10*log2(N)) */ u32 oldmask; /* Mask of old.* columns referenced */ u32 newmask; /* Mask of new.* columns referenced */ @@ -12971,7 +13053,9 @@ SQLITE_PRIVATE int sqlite3CantopenError(int); # define sqlite3Isxdigit(x) isxdigit((unsigned char)(x)) # define sqlite3Tolower(x) tolower((unsigned char)(x)) #endif +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS SQLITE_PRIVATE int sqlite3IsIdChar(u8); +#endif /* ** Internal function prototypes @@ -12999,7 +13083,9 @@ SQLITE_PRIVATE void sqlite3ScratchFree(void*); SQLITE_PRIVATE void *sqlite3PageMalloc(int); SQLITE_PRIVATE void sqlite3PageFree(void*); SQLITE_PRIVATE void sqlite3MemSetDefault(void); +#ifndef SQLITE_OMIT_BUILTIN_TEST SQLITE_PRIVATE void sqlite3BenignMallocHooks(void (*)(void), void (*)(void)); +#endif SQLITE_PRIVATE int sqlite3HeapNearlyFull(void); /* @@ -13067,7 +13153,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf(StrAccum*, u32, const char*, va_list); SQLITE_PRIVATE void sqlite3XPrintf(StrAccum*, u32, const char*, ...); SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3*,const char*, ...); SQLITE_PRIVATE char *sqlite3VMPrintf(sqlite3*,const char*, va_list); -SQLITE_PRIVATE char *sqlite3MAppendf(sqlite3*,char*,const char*,...); #if defined(SQLITE_DEBUG) || defined(SQLITE_HAVE_OS_TRACE) SQLITE_PRIVATE void sqlite3DebugPrintf(const char*, ...); #endif @@ -13076,17 +13161,13 @@ SQLITE_PRIVATE void *sqlite3TestTextToPtr(const char*); #endif #if defined(SQLITE_DEBUG) -SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView*,u8); -SQLITE_PRIVATE void sqlite3TreeViewPop(TreeView*); -SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView*, const char*, ...); -SQLITE_PRIVATE void sqlite3TreeViewItem(TreeView*, const char*, u8); SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView*, const Expr*, u8); SQLITE_PRIVATE void sqlite3TreeViewExprList(TreeView*, const ExprList*, u8, const char*); SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView*, const Select*, u8); #endif -SQLITE_PRIVATE void sqlite3SetString(char **, sqlite3*, const char*, ...); +SQLITE_PRIVATE void sqlite3SetString(char **, sqlite3*, const char*); SQLITE_PRIVATE void sqlite3ErrorMsg(Parse*, const char*, ...); SQLITE_PRIVATE int sqlite3Dequote(char*); SQLITE_PRIVATE int sqlite3KeywordCode(const unsigned char*, int); @@ -13144,11 +13225,14 @@ SQLITE_PRIVATE int sqlite3FaultSim(int); SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32); SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec*, u32); +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec*, u32); SQLITE_PRIVATE int sqlite3BitvecSet(Bitvec*, u32); SQLITE_PRIVATE void sqlite3BitvecClear(Bitvec*, u32, void*); SQLITE_PRIVATE void sqlite3BitvecDestroy(Bitvec*); SQLITE_PRIVATE u32 sqlite3BitvecSize(Bitvec*); +#ifndef SQLITE_OMIT_BUILTIN_TEST SQLITE_PRIVATE int sqlite3BitvecBuiltinTest(int,int*); +#endif SQLITE_PRIVATE RowSet *sqlite3RowSetInit(sqlite3*, void*, unsigned int); SQLITE_PRIVATE void sqlite3RowSetClear(RowSet*); @@ -13236,6 +13320,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeExprList(Parse*, ExprList*, int, u8); #define SQLITE_ECEL_FACTOR 0x02 /* Factor out constant terms */ SQLITE_PRIVATE void sqlite3ExprIfTrue(Parse*, Expr*, int, int); SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse*, Expr*, int, int); +SQLITE_PRIVATE void sqlite3ExprIfFalseDup(Parse*, Expr*, int, int); SQLITE_PRIVATE Table *sqlite3FindTable(sqlite3*,const char*, const char*); SQLITE_PRIVATE Table *sqlite3LocateTable(Parse*,int isView,const char*, const char*); SQLITE_PRIVATE Table *sqlite3LocateTableItem(Parse*,int isView,struct SrcList_item *); @@ -13252,8 +13337,10 @@ SQLITE_PRIVATE void sqlite3ExprAnalyzeAggregates(NameContext*, Expr*); SQLITE_PRIVATE void sqlite3ExprAnalyzeAggList(NameContext*,ExprList*); SQLITE_PRIVATE int sqlite3FunctionUsesThisSrc(Expr*, SrcList*); SQLITE_PRIVATE Vdbe *sqlite3GetVdbe(Parse*); +#ifndef SQLITE_OMIT_BUILTIN_TEST SQLITE_PRIVATE void sqlite3PrngSaveState(void); SQLITE_PRIVATE void sqlite3PrngRestoreState(void); +#endif SQLITE_PRIVATE void sqlite3RollbackAll(sqlite3*,int); SQLITE_PRIVATE void sqlite3CodeVerifySchema(Parse*, int); SQLITE_PRIVATE void sqlite3CodeVerifyNamedSchema(Parse*, const char *zDb); @@ -13471,6 +13558,7 @@ SQLITE_PRIVATE void sqlite3NestedParse(Parse*, const char*, ...); SQLITE_PRIVATE void sqlite3ExpirePreparedStatements(sqlite3*); SQLITE_PRIVATE int sqlite3CodeSubselect(Parse *, Expr *, int, int); SQLITE_PRIVATE void sqlite3SelectPrep(Parse*, Select*, NameContext*); +SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p); SQLITE_PRIVATE int sqlite3MatchSpanName(const char*, const char*, const char*, const char*); SQLITE_PRIVATE int sqlite3ResolveExprNames(NameContext*, Expr*); SQLITE_PRIVATE void sqlite3ResolveSelectNames(Parse*, Select*, NameContext*); @@ -13783,6 +13871,10 @@ SQLITE_PRIVATE int sqlite3ThreadCreate(SQLiteThread**,void*(*)(void*),void*); SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread*, void**); #endif +#if defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST) +SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3*); +#endif + #endif /* _SQLITEINT_H_ */ /************** End of sqliteInt.h *******************************************/ @@ -13801,6 +13893,7 @@ SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread*, void**); ** ** This file contains definitions of global variables and constants. */ +/* #include "sqliteInt.h" */ /* An array to map all upper-case characters into their corresponding ** lower-case character. @@ -13974,7 +14067,7 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = { 0, /* nScratch */ (void*)0, /* pPage */ 0, /* szPage */ - 0, /* nPage */ + SQLITE_DEFAULT_PCACHE_INITSZ, /* nPage */ 0, /* mxParserStack */ 0, /* sharedCacheEnabled */ SQLITE_SORTER_PMASZ, /* szPma */ @@ -14040,6 +14133,7 @@ SQLITE_PRIVATE const Token sqlite3IntTokens[] = { SQLITE_PRIVATE int sqlite3PendingByte = 0x40000000; #endif +/* #include "opcodes.h" */ /* ** Properties of opcodes. The OPFLG_INITIALIZER macro is ** created by mkopcodeh.awk during compilation. Data is obtained @@ -14068,6 +14162,7 @@ SQLITE_PRIVATE const unsigned char sqlite3OpcodeProperty[] = OPFLG_INITIALIZER; #ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS +/* #include "sqliteInt.h" */ /* ** An array of names of all compile-time options. This array should @@ -14497,6 +14592,7 @@ SQLITE_API const char *SQLITE_STDCALL sqlite3_compileoption_get(int N){ ** This module implements the sqlite3_status() interface and related ** functionality. */ +/* #include "sqliteInt.h" */ /************** Include vdbeInt.h in the middle of status.c ******************/ /************** Begin file vdbeInt.h *****************************************/ /* @@ -14584,6 +14680,9 @@ struct VdbeCursor { i64 seqCount; /* Sequence counter */ i64 movetoTarget; /* Argument to the deferred sqlite3BtreeMoveto() */ VdbeSorter *pSorter; /* Sorter object for OP_SorterOpen cursors */ +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + u64 maskUsed; /* Mask of columns used by this cursor */ +#endif /* Cached information about the header for the data record that the ** cursor is currently pointing to. Only valid if cacheStatus matches @@ -14687,6 +14786,12 @@ struct Mem { #endif }; +/* +** Size of struct Mem not including the Mem.zMalloc member or anything that +** follows. +*/ +#define MEMCELLSIZE offsetof(Mem,zMalloc) + /* One or more of the following flags are set to indicate the validOK ** representations of the value stored in the Mem struct. ** @@ -14771,14 +14876,16 @@ struct AuxData { ** (Mem) which are only defined there. */ struct sqlite3_context { - Mem *pOut; /* The return value is stored here */ - FuncDef *pFunc; /* Pointer to function information */ - Mem *pMem; /* Memory cell used to store aggregate context */ - Vdbe *pVdbe; /* The VM that owns this context */ - int iOp; /* Instruction number of OP_Function */ - int isError; /* Error code returned by the function. */ - u8 skipFlag; /* Skip accumulator loading if true */ - u8 fErrorOrAux; /* isError!=0 or pVdbe->pAuxData modified */ + Mem *pOut; /* The return value is stored here */ + FuncDef *pFunc; /* Pointer to function information */ + Mem *pMem; /* Memory cell used to store aggregate context */ + Vdbe *pVdbe; /* The VM that owns this context */ + int iOp; /* Instruction number of OP_Function */ + int isError; /* Error code returned by the function. */ + u8 skipFlag; /* Skip accumulator loading if true */ + u8 fErrorOrAux; /* isError!=0 or pVdbe->pAuxData modified */ + u8 argc; /* Number of arguments */ + sqlite3_value *argv[1]; /* Argument set */ }; /* @@ -14892,6 +14999,7 @@ struct Vdbe { /* ** Function prototypes */ +SQLITE_PRIVATE void sqlite3VdbeError(Vdbe*, const char *, ...); SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *, VdbeCursor*); void sqliteVdbePopStack(Vdbe*,int); SQLITE_PRIVATE int sqlite3VdbeCursorMoveto(VdbeCursor*); @@ -15368,6 +15476,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_db_status( ** Willmann-Bell, Inc ** Richmond, Virginia (USA) */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* #include <assert.h> */ #include <time.h> @@ -15679,7 +15788,7 @@ static void computeYMD(DateTime *p){ A = Z + 1 + A - (A/4); B = A + 1524; C = (int)((B - 122.1)/365.25); - D = (36525*C)/100; + D = (36525*(C&32767))/100; E = (int)((B-D)/30.6001); X1 = (int)(30.6001*E); p->D = B - D - X1; @@ -16480,6 +16589,7 @@ SQLITE_PRIVATE void sqlite3RegisterDateTimeFunctions(void){ ** architectures. */ #define _SQLITE_OS_C_ 1 +/* #include "sqliteInt.h" */ #undef _SQLITE_OS_C_ /* @@ -16886,6 +16996,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vfs_unregister(sqlite3_vfs *pVfs){ ** during a hash table resize is a benign fault. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_BUILTIN_TEST @@ -16967,6 +17078,7 @@ SQLITE_PRIVATE void sqlite3EndBenignMalloc(void){ ** are merely placeholders. Real drivers must be substituted using ** sqlite3_config() before SQLite will operate. */ +/* #include "sqliteInt.h" */ /* ** This version of the memory allocator is the default. It is @@ -17053,6 +17165,7 @@ SQLITE_PRIVATE void sqlite3MemSetDefault(void){ ** be necessary when compiling for Delphi, ** for example. */ +/* #include "sqliteInt.h" */ /* ** This version of the memory allocator is the default. It is @@ -17328,6 +17441,7 @@ SQLITE_PRIVATE void sqlite3MemSetDefault(void){ ** This file contains implementations of the low-level memory allocation ** routines specified in the sqlite3_mem_methods object. */ +/* #include "sqliteInt.h" */ /* ** This version of the memory allocator is used only if the @@ -17862,6 +17976,7 @@ SQLITE_PRIVATE int sqlite3MemdebugMallocCount(){ ** This version of the memory allocation subsystem is included ** in the build only if SQLITE_ENABLE_MEMSYS3 is defined. */ +/* #include "sqliteInt.h" */ /* ** This version of the memory allocator is only built into the library @@ -18576,6 +18691,7 @@ SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys3(void){ ** The sqlite3_status() logic tracks the maximum values of n and M so ** that an application can, at any time, verify this constraint. */ +/* #include "sqliteInt.h" */ /* ** This version of the memory allocator is used only when @@ -19119,6 +19235,7 @@ SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetMemsys5(void){ ** ** This file contains code that is common across all mutex implementations. */ +/* #include "sqliteInt.h" */ #if defined(SQLITE_DEBUG) && !defined(SQLITE_MUTEX_OMIT) /* @@ -19150,9 +19267,14 @@ SQLITE_PRIVATE int sqlite3MutexInit(void){ }else{ pFrom = sqlite3NoopMutex(); } - memcpy(pTo, pFrom, offsetof(sqlite3_mutex_methods, xMutexAlloc)); - memcpy(&pTo->xMutexFree, &pFrom->xMutexFree, - sizeof(*pTo) - offsetof(sqlite3_mutex_methods, xMutexFree)); + pTo->xMutexInit = pFrom->xMutexInit; + pTo->xMutexEnd = pFrom->xMutexEnd; + pTo->xMutexFree = pFrom->xMutexFree; + pTo->xMutexEnter = pFrom->xMutexEnter; + pTo->xMutexTry = pFrom->xMutexTry; + pTo->xMutexLeave = pFrom->xMutexLeave; + pTo->xMutexHeld = pFrom->xMutexHeld; + pTo->xMutexNotheld = pFrom->xMutexNotheld; pTo->xMutexAlloc = pFrom->xMutexAlloc; } rc = sqlite3GlobalConfig.mutex.xMutexInit(); @@ -19287,6 +19409,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_mutex_notheld(sqlite3_mutex *p){ ** that does error checking on mutexes to make sure they are being ** called correctly. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_MUTEX_OMIT @@ -19368,7 +19491,7 @@ static int debugMutexEnd(void){ return SQLITE_OK; } ** that means that a mutex could not be allocated. */ static sqlite3_mutex *debugMutexAlloc(int id){ - static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_APP3 - 1]; + static sqlite3_debug_mutex aStatic[SQLITE_MUTEX_STATIC_VFS3 - 1]; sqlite3_debug_mutex *pNew = 0; switch( id ){ case SQLITE_MUTEX_FAST: @@ -19490,6 +19613,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ ************************************************************************* ** This file contains the C functions that implement mutexes for pthreads */ +/* #include "sqliteInt.h" */ /* ** The code in this file is only used if we are compiling threadsafe @@ -19583,6 +19707,9 @@ static int pthreadMutexEnd(void){ return SQLITE_OK; } ** <li> SQLITE_MUTEX_STATIC_APP1 ** <li> SQLITE_MUTEX_STATIC_APP2 ** <li> SQLITE_MUTEX_STATIC_APP3 +** <li> SQLITE_MUTEX_STATIC_VFS1 +** <li> SQLITE_MUTEX_STATIC_VFS2 +** <li> SQLITE_MUTEX_STATIC_VFS3 ** </ul> ** ** The first two constants cause sqlite3_mutex_alloc() to create @@ -19619,6 +19746,9 @@ static sqlite3_mutex *pthreadMutexAlloc(int iType){ SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; sqlite3_mutex *p; @@ -19858,6 +19988,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ ************************************************************************* ** This file contains the C functions that implement mutexes for Win32. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* @@ -20225,6 +20356,9 @@ static sqlite3_mutex winMutex_staticMutexes[] = { SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, + SQLITE3_MUTEX_INITIALIZER, SQLITE3_MUTEX_INITIALIZER }; @@ -20296,6 +20430,9 @@ static int winMutexEnd(void){ ** <li> SQLITE_MUTEX_STATIC_APP1 ** <li> SQLITE_MUTEX_STATIC_APP2 ** <li> SQLITE_MUTEX_STATIC_APP3 +** <li> SQLITE_MUTEX_STATIC_VFS1 +** <li> SQLITE_MUTEX_STATIC_VFS2 +** <li> SQLITE_MUTEX_STATIC_VFS3 ** </ul> ** ** The first two constants cause sqlite3_mutex_alloc() to create @@ -20527,6 +20664,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){ ** ** Memory allocation functions used throughout sqlite. */ +/* #include "sqliteInt.h" */ /* #include <stdarg.h> */ /* @@ -20707,10 +20845,9 @@ SQLITE_PRIVATE int sqlite3MallocInit(void){ sqlite3GlobalConfig.nScratch = 0; } if( sqlite3GlobalConfig.pPage==0 || sqlite3GlobalConfig.szPage<512 - || sqlite3GlobalConfig.nPage<1 ){ + || sqlite3GlobalConfig.nPage<=0 ){ sqlite3GlobalConfig.pPage = 0; sqlite3GlobalConfig.szPage = 0; - sqlite3GlobalConfig.nPage = 0; } rc = sqlite3GlobalConfig.m.xInit(sqlite3GlobalConfig.m.pAppData); if( rc!=SQLITE_OK ) memset(&mem0, 0, sizeof(mem0)); @@ -20740,10 +20877,8 @@ SQLITE_PRIVATE void sqlite3MallocEnd(void){ ** Return the amount of memory currently checked out. */ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_used(void){ - int n, mx; - sqlite3_int64 res; - sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, 0); - res = (sqlite3_int64)n; /* Work around bug in Borland C. Ticket #3216 */ + sqlite3_int64 res, mx; + sqlite3_status64(SQLITE_STATUS_MEMORY_USED, &res, &mx, 0); return res; } @@ -20753,11 +20888,9 @@ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_used(void){ ** or since the most recent reset. */ SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3_memory_highwater(int resetFlag){ - int n, mx; - sqlite3_int64 res; - sqlite3_status(SQLITE_STATUS_MEMORY_USED, &n, &mx, resetFlag); - res = (sqlite3_int64)mx; /* Work around bug in Borland C. Ticket #3216 */ - return res; + sqlite3_int64 res, mx; + sqlite3_status64(SQLITE_STATUS_MEMORY_USED, &res, &mx, resetFlag); + return mx; } /* @@ -21289,19 +21422,11 @@ SQLITE_PRIVATE char *sqlite3DbStrNDup(sqlite3 *db, const char *z, u64 n){ } /* -** Create a string from the zFromat argument and the va_list that follows. -** Store the string in memory obtained from sqliteMalloc() and make *pz -** point to that string. +** Free any prior content in *pz and replace it with a copy of zNew. */ -SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zFormat, ...){ - va_list ap; - char *z; - - va_start(ap, zFormat); - z = sqlite3VMPrintf(db, zFormat, ap); - va_end(ap); +SQLITE_PRIVATE void sqlite3SetString(char **pz, sqlite3 *db, const char *zNew){ sqlite3DbFree(db, *pz); - *pz = z; + *pz = sqlite3DbStrDup(db, zNew); } /* @@ -21322,17 +21447,16 @@ static SQLITE_NOINLINE int apiOomError(sqlite3 *db){ ** function. However, if a malloc() failure has occurred since the previous ** invocation SQLITE_NOMEM is returned instead. ** -** If the first argument, db, is not NULL and a malloc() error has occurred, -** then the connection error-code (the value returned by sqlite3_errcode()) -** is set to SQLITE_NOMEM. +** If an OOM as occurred, then the connection error-code (the value +** returned by sqlite3_errcode()) is set to SQLITE_NOMEM. */ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){ - /* If the db handle is not NULL, then we must hold the connection handle - ** mutex here. Otherwise the read (and possible write) of db->mallocFailed + /* If the db handle must hold the connection handle mutex here. + ** Otherwise the read (and possible write) of db->mallocFailed ** is unsafe, as is the call to sqlite3Error(). */ - assert( !db || sqlite3_mutex_held(db->mutex) ); - if( db==0 ) return rc & 0xff; + assert( db!=0 ); + assert( sqlite3_mutex_held(db->mutex) ); if( db->mallocFailed || rc==SQLITE_IOERR_NOMEM ){ return apiOomError(db); } @@ -21343,18 +21467,16 @@ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){ /************** Begin file printf.c ******************************************/ /* ** The "printf" code that follows dates from the 1980's. It is in -** the public domain. The original comments are included here for -** completeness. They are very out-of-date but might be useful as -** an historical reference. Most of the "enhancements" have been backed -** out so that the functionality is now the same as standard printf(). +** the public domain. ** ************************************************************************** ** ** This file contains code for a set of "printf"-like routines. These ** routines format strings much like the printf() from the standard C ** library, though the implementation here has enhancements to support -** SQLlite. +** SQLite. */ +/* #include "sqliteInt.h" */ /* ** Conversion types fall into various categories as defined by the @@ -22273,24 +22395,6 @@ SQLITE_PRIVATE char *sqlite3MPrintf(sqlite3 *db, const char *zFormat, ...){ return z; } -/* -** Like sqlite3MPrintf(), but call sqlite3DbFree() on zStr after formatting -** the string and before returning. This routine is intended to be used -** to modify an existing string. For example: -** -** x = sqlite3MPrintf(db, x, "prefix %s suffix", x); -** -*/ -SQLITE_PRIVATE char *sqlite3MAppendf(sqlite3 *db, char *zStr, const char *zFormat, ...){ - va_list ap; - char *z; - va_start(ap, zFormat); - z = sqlite3VMPrintf(db, zFormat, ap); - va_end(ap); - sqlite3DbFree(db, zStr); - return z; -} - /* ** Print into memory obtained from sqlite3_malloc(). Omit the internal ** %-conversion extensions. @@ -22375,6 +22479,11 @@ SQLITE_API char *SQLITE_CDECL sqlite3_snprintf(int n, char *zBuf, const char *zF ** sqlite3_log() must render into a static buffer. It cannot dynamically ** allocate memory because it might be called while the memory allocator ** mutex is held. +** +** sqlite3VXPrintf() might ask for *temporary* memory allocations for +** certain format characters (%q) or for very large precisions or widths. +** Care must be taken that any sqlite3_log() calls that occur while the +** memory mutex is held do not use these mechanisms. */ static void renderLogMsg(int iErrCode, const char *zFormat, va_list ap){ StrAccum acc; /* String accumulator */ @@ -22418,22 +22527,46 @@ SQLITE_PRIVATE void sqlite3DebugPrintf(const char *zFormat, ...){ } #endif -#ifdef SQLITE_DEBUG -/************************************************************************* -** Routines for implementing the "TreeView" display of hierarchical -** data structures for debugging. + +/* +** variable-argument wrapper around sqlite3VXPrintf(). +*/ +SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ + va_list ap; + va_start(ap,zFormat); + sqlite3VXPrintf(p, bFlags, zFormat, ap); + va_end(ap); +} + +/************** End of printf.c **********************************************/ +/************** Begin file treeview.c ****************************************/ +/* +** 2015-06-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. ** -** The main entry points (coded elsewhere) are: -** sqlite3TreeViewExpr(0, pExpr, 0); -** sqlite3TreeViewExprList(0, pList, 0, 0); -** sqlite3TreeViewSelect(0, pSelect, 0); -** Insert calls to those routines while debugging in order to display -** a diagram of Expr, ExprList, and Select objects. +************************************************************************* ** +** This file contains C code to implement the TreeView debugging routines. +** These routines print a parse tree to standard output for debugging and +** analysis. +** +** The interfaces in this file is only available when compiling +** with SQLITE_DEBUG. +*/ +/* #include "sqliteInt.h" */ +#ifdef SQLITE_DEBUG + +/* +** Add a new subitem to the tree. The moreToFollow flag indicates that this +** is not the last item in the tree. */ -/* Add a new subitem to the tree. The moreToFollow flag indicates that this -** is not the last item in the tree. */ -SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ +static TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ if( p==0 ){ p = sqlite3_malloc64( sizeof(*p) ); if( p==0 ) return 0; @@ -22445,15 +22578,21 @@ SQLITE_PRIVATE TreeView *sqlite3TreeViewPush(TreeView *p, u8 moreToFollow){ if( p->iLevel<sizeof(p->bLine) ) p->bLine[p->iLevel] = moreToFollow; return p; } -/* Finished with one layer of the tree */ -SQLITE_PRIVATE void sqlite3TreeViewPop(TreeView *p){ + +/* +** Finished with one layer of the tree +*/ +static void sqlite3TreeViewPop(TreeView *p){ if( p==0 ) return; p->iLevel--; if( p->iLevel<0 ) sqlite3_free(p); } -/* Generate a single line of output for the tree, with a prefix that contains -** all the appropriate tree lines */ -SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ + +/* +** Generate a single line of output for the tree, with a prefix that contains +** all the appropriate tree lines +*/ +static void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ va_list ap; int i; StrAccum acc; @@ -22473,24 +22612,367 @@ SQLITE_PRIVATE void sqlite3TreeViewLine(TreeView *p, const char *zFormat, ...){ fprintf(stdout,"%s", zBuf); fflush(stdout); } -/* Shorthand for starting a new tree item that consists of a single label */ -SQLITE_PRIVATE void sqlite3TreeViewItem(TreeView *p, const char *zLabel, u8 moreToFollow){ - p = sqlite3TreeViewPush(p, moreToFollow); + +/* +** Shorthand for starting a new tree item that consists of a single label +*/ +static void sqlite3TreeViewItem(TreeView *p, const char *zLabel,u8 moreFollows){ + p = sqlite3TreeViewPush(p, moreFollows); sqlite3TreeViewLine(p, "%s", zLabel); } -#endif /* SQLITE_DEBUG */ + /* -** variable-argument wrapper around sqlite3VXPrintf(). +** Generate a human-readable description of a the Select object. */ -SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ...){ - va_list ap; - va_start(ap,zFormat); - sqlite3VXPrintf(p, bFlags, zFormat, ap); - va_end(ap); +SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){ + int n = 0; + pView = sqlite3TreeViewPush(pView, moreToFollow); + sqlite3TreeViewLine(pView, "SELECT%s%s (0x%p) selFlags=0x%x", + ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""), + ((p->selFlags & SF_Aggregate) ? " agg_flag" : ""), p, p->selFlags + ); + if( p->pSrc && p->pSrc->nSrc ) n++; + if( p->pWhere ) n++; + if( p->pGroupBy ) n++; + if( p->pHaving ) n++; + if( p->pOrderBy ) n++; + if( p->pLimit ) n++; + if( p->pOffset ) n++; + if( p->pPrior ) n++; + sqlite3TreeViewExprList(pView, p->pEList, (n--)>0, "result-set"); + if( p->pSrc && p->pSrc->nSrc ){ + int i; + pView = sqlite3TreeViewPush(pView, (n--)>0); + sqlite3TreeViewLine(pView, "FROM"); + for(i=0; i<p->pSrc->nSrc; i++){ + struct SrcList_item *pItem = &p->pSrc->a[i]; + StrAccum x; + char zLine[100]; + sqlite3StrAccumInit(&x, 0, zLine, sizeof(zLine), 0); + sqlite3XPrintf(&x, 0, "{%d,*}", pItem->iCursor); + if( pItem->zDatabase ){ + sqlite3XPrintf(&x, 0, " %s.%s", pItem->zDatabase, pItem->zName); + }else if( pItem->zName ){ + sqlite3XPrintf(&x, 0, " %s", pItem->zName); + } + if( pItem->pTab ){ + sqlite3XPrintf(&x, 0, " tabname=%Q", pItem->pTab->zName); + } + if( pItem->zAlias ){ + sqlite3XPrintf(&x, 0, " (AS %s)", pItem->zAlias); + } + if( pItem->jointype & JT_LEFT ){ + sqlite3XPrintf(&x, 0, " LEFT-JOIN"); + } + sqlite3StrAccumFinish(&x); + sqlite3TreeViewItem(pView, zLine, i<p->pSrc->nSrc-1); + if( pItem->pSelect ){ + sqlite3TreeViewSelect(pView, pItem->pSelect, 0); + } + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); + } + if( p->pWhere ){ + sqlite3TreeViewItem(pView, "WHERE", (n--)>0); + sqlite3TreeViewExpr(pView, p->pWhere, 0); + sqlite3TreeViewPop(pView); + } + if( p->pGroupBy ){ + sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY"); + } + if( p->pHaving ){ + sqlite3TreeViewItem(pView, "HAVING", (n--)>0); + sqlite3TreeViewExpr(pView, p->pHaving, 0); + sqlite3TreeViewPop(pView); + } + if( p->pOrderBy ){ + sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY"); + } + if( p->pLimit ){ + sqlite3TreeViewItem(pView, "LIMIT", (n--)>0); + sqlite3TreeViewExpr(pView, p->pLimit, 0); + sqlite3TreeViewPop(pView); + } + if( p->pOffset ){ + sqlite3TreeViewItem(pView, "OFFSET", (n--)>0); + sqlite3TreeViewExpr(pView, p->pOffset, 0); + sqlite3TreeViewPop(pView); + } + if( p->pPrior ){ + const char *zOp = "UNION"; + switch( p->op ){ + case TK_ALL: zOp = "UNION ALL"; break; + case TK_INTERSECT: zOp = "INTERSECT"; break; + case TK_EXCEPT: zOp = "EXCEPT"; break; + } + sqlite3TreeViewItem(pView, zOp, (n--)>0); + sqlite3TreeViewSelect(pView, p->pPrior, 0); + sqlite3TreeViewPop(pView); + } + sqlite3TreeViewPop(pView); } -/************** End of printf.c **********************************************/ +/* +** Generate a human-readable explanation of an expression tree. +*/ +SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){ + const char *zBinOp = 0; /* Binary operator */ + const char *zUniOp = 0; /* Unary operator */ + char zFlgs[30]; + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( pExpr==0 ){ + sqlite3TreeViewLine(pView, "nil"); + sqlite3TreeViewPop(pView); + return; + } + if( pExpr->flags ){ + sqlite3_snprintf(sizeof(zFlgs),zFlgs," flags=0x%x",pExpr->flags); + }else{ + zFlgs[0] = 0; + } + switch( pExpr->op ){ + case TK_AGG_COLUMN: { + sqlite3TreeViewLine(pView, "AGG{%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + break; + } + case TK_COLUMN: { + if( pExpr->iTable<0 ){ + /* This only happens when coding check constraints */ + sqlite3TreeViewLine(pView, "COLUMN(%d)%s", pExpr->iColumn, zFlgs); + }else{ + sqlite3TreeViewLine(pView, "{%d:%d}%s", + pExpr->iTable, pExpr->iColumn, zFlgs); + } + break; + } + case TK_INTEGER: { + if( pExpr->flags & EP_IntValue ){ + sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue); + }else{ + sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken); + } + break; + } +#ifndef SQLITE_OMIT_FLOATING_POINT + case TK_FLOAT: { + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_STRING: { + sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken); + break; + } + case TK_NULL: { + sqlite3TreeViewLine(pView,"NULL"); + break; + } +#ifndef SQLITE_OMIT_BLOB_LITERAL + case TK_BLOB: { + sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); + break; + } +#endif + case TK_VARIABLE: { + sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)", + pExpr->u.zToken, pExpr->iColumn); + break; + } + case TK_REGISTER: { + sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); + break; + } + case TK_AS: { + sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + case TK_ID: { + sqlite3TreeViewLine(pView,"ID \"%w\"", pExpr->u.zToken); + break; + } +#ifndef SQLITE_OMIT_CAST + case TK_CAST: { + /* Expressions of the form: CAST(pLeft AS token) */ + sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } +#endif /* SQLITE_OMIT_CAST */ + case TK_LT: zBinOp = "LT"; break; + case TK_LE: zBinOp = "LE"; break; + case TK_GT: zBinOp = "GT"; break; + case TK_GE: zBinOp = "GE"; break; + case TK_NE: zBinOp = "NE"; break; + case TK_EQ: zBinOp = "EQ"; break; + case TK_IS: zBinOp = "IS"; break; + case TK_ISNOT: zBinOp = "ISNOT"; break; + case TK_AND: zBinOp = "AND"; break; + case TK_OR: zBinOp = "OR"; break; + case TK_PLUS: zBinOp = "ADD"; break; + case TK_STAR: zBinOp = "MUL"; break; + case TK_MINUS: zBinOp = "SUB"; break; + case TK_REM: zBinOp = "REM"; break; + case TK_BITAND: zBinOp = "BITAND"; break; + case TK_BITOR: zBinOp = "BITOR"; break; + case TK_SLASH: zBinOp = "DIV"; break; + case TK_LSHIFT: zBinOp = "LSHIFT"; break; + case TK_RSHIFT: zBinOp = "RSHIFT"; break; + case TK_CONCAT: zBinOp = "CONCAT"; break; + case TK_DOT: zBinOp = "DOT"; break; + + case TK_UMINUS: zUniOp = "UMINUS"; break; + case TK_UPLUS: zUniOp = "UPLUS"; break; + case TK_BITNOT: zUniOp = "BITNOT"; break; + case TK_NOT: zUniOp = "NOT"; break; + case TK_ISNULL: zUniOp = "ISNULL"; break; + case TK_NOTNULL: zUniOp = "NOTNULL"; break; + + case TK_COLLATE: { + sqlite3TreeViewLine(pView, "COLLATE %Q", pExpr->u.zToken); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + break; + } + + case TK_AGG_FUNCTION: + case TK_FUNCTION: { + ExprList *pFarg; /* List of function arguments */ + if( ExprHasProperty(pExpr, EP_TokenOnly) ){ + pFarg = 0; + }else{ + pFarg = pExpr->x.pList; + } + if( pExpr->op==TK_AGG_FUNCTION ){ + sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q", + pExpr->op2, pExpr->u.zToken); + }else{ + sqlite3TreeViewLine(pView, "FUNCTION %Q", pExpr->u.zToken); + } + if( pFarg ){ + sqlite3TreeViewExprList(pView, pFarg, 0, 0); + } + break; + } +#ifndef SQLITE_OMIT_SUBQUERY + case TK_EXISTS: { + sqlite3TreeViewLine(pView, "EXISTS-expr"); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_SELECT: { + sqlite3TreeViewLine(pView, "SELECT-expr"); + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + break; + } + case TK_IN: { + sqlite3TreeViewLine(pView, "IN"); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + if( ExprHasProperty(pExpr, EP_xIsSelect) ){ + sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); + }else{ + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + } + break; + } +#endif /* SQLITE_OMIT_SUBQUERY */ + + /* + ** x BETWEEN y AND z + ** + ** This is equivalent to + ** + ** x>=y AND x<=z + ** + ** X is stored in pExpr->pLeft. + ** Y is stored in pExpr->pList->a[0].pExpr. + ** Z is stored in pExpr->pList->a[1].pExpr. + */ + case TK_BETWEEN: { + Expr *pX = pExpr->pLeft; + Expr *pY = pExpr->x.pList->a[0].pExpr; + Expr *pZ = pExpr->x.pList->a[1].pExpr; + sqlite3TreeViewLine(pView, "BETWEEN"); + sqlite3TreeViewExpr(pView, pX, 1); + sqlite3TreeViewExpr(pView, pY, 1); + sqlite3TreeViewExpr(pView, pZ, 0); + break; + } + case TK_TRIGGER: { + /* If the opcode is TK_TRIGGER, then the expression is a reference + ** to a column in the new.* or old.* pseudo-tables available to + ** trigger programs. In this case Expr.iTable is set to 1 for the + ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn + ** is set to the column of the pseudo-table to read, or to -1 to + ** read the rowid field. + */ + sqlite3TreeViewLine(pView, "%s(%d)", + pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn); + break; + } + case TK_CASE: { + sqlite3TreeViewLine(pView, "CASE"); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); + break; + } +#ifndef SQLITE_OMIT_TRIGGER + case TK_RAISE: { + const char *zType = "unk"; + switch( pExpr->affinity ){ + case OE_Rollback: zType = "rollback"; break; + case OE_Abort: zType = "abort"; break; + case OE_Fail: zType = "fail"; break; + case OE_Ignore: zType = "ignore"; break; + } + sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken); + break; + } +#endif + default: { + sqlite3TreeViewLine(pView, "op=%d", pExpr->op); + break; + } + } + if( zBinOp ){ + sqlite3TreeViewLine(pView, "%s%s", zBinOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); + sqlite3TreeViewExpr(pView, pExpr->pRight, 0); + }else if( zUniOp ){ + sqlite3TreeViewLine(pView, "%s%s", zUniOp, zFlgs); + sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); + } + sqlite3TreeViewPop(pView); +} + +/* +** Generate a human-readable explanation of an expression list. +*/ +SQLITE_PRIVATE void sqlite3TreeViewExprList( + TreeView *pView, + const ExprList *pList, + u8 moreToFollow, + const char *zLabel +){ + int i; + pView = sqlite3TreeViewPush(pView, moreToFollow); + if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST"; + if( pList==0 ){ + sqlite3TreeViewLine(pView, "%s (empty)", zLabel); + }else{ + sqlite3TreeViewLine(pView, "%s", zLabel); + for(i=0; i<pList->nExpr; i++){ + sqlite3TreeViewExpr(pView, pList->a[i].pExpr, i<pList->nExpr-1); + } + } + sqlite3TreeViewPop(pView); +} + +#endif /* SQLITE_DEBUG */ + +/************** End of treeview.c ********************************************/ /************** Begin file random.c ******************************************/ /* ** 2001 September 15 @@ -22509,6 +22991,7 @@ SQLITE_PRIVATE void sqlite3XPrintf(StrAccum *p, u32 bFlags, const char *zFormat, ** Random numbers are used by some of the database backends in order ** to generate random integer keys for tables or random filenames. */ +/* #include "sqliteInt.h" */ /* All threads share a single random number generator. @@ -22655,7 +23138,9 @@ SQLITE_PRIVATE void sqlite3PrngRestoreState(void){ ** of multiple cores can do so, while also allowing applications to stay ** single-threaded if desired. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN +/* # include "os_win.h" */ #endif #if SQLITE_MAX_WORKER_THREADS>0 @@ -22929,7 +23414,9 @@ SQLITE_PRIVATE int sqlite3ThreadJoin(SQLiteThread *p, void **ppOut){ ** 0xfe 0xff big-endian utf-16 follows ** */ +/* #include "sqliteInt.h" */ /* #include <assert.h> */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_AMALGAMATION /* @@ -23442,6 +23929,7 @@ SQLITE_PRIVATE void sqlite3UtfSelfTest(void){ ** strings, and stuff like that. ** */ +/* #include "sqliteInt.h" */ /* #include <stdarg.h> */ #if HAVE_ISNAN || SQLITE_HAVE_ISNAN # include <math.h> @@ -23531,10 +24019,8 @@ SQLITE_PRIVATE int sqlite3IsNaN(double x){ ** than 1GiB) the value returned might be less than the true string length. */ SQLITE_PRIVATE int sqlite3Strlen30(const char *z){ - const char *z2 = z; if( z==0 ) return 0; - while( *z2 ){ z2++; } - return 0x3fffffff & (int)(z2 - z); + return 0x3fffffff & (int)strlen(z); } /* @@ -24506,14 +24992,38 @@ SQLITE_PRIVATE int sqlite3VarintLen(u64 v){ ** Read or write a four-byte big-endian integer value. */ SQLITE_PRIVATE u32 sqlite3Get4byte(const u8 *p){ +#if SQLITE_BYTEORDER==4321 + u32 x; + memcpy(&x,p,4); + return x; +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x; + memcpy(&x,p,4); + return __builtin_bswap32(x); +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x; + memcpy(&x,p,4); + return _byteswap_ulong(x); +#else testcase( p[0]&0x80 ); return ((unsigned)p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3]; +#endif } SQLITE_PRIVATE void sqlite3Put4byte(unsigned char *p, u32 v){ +#if SQLITE_BYTEORDER==4321 + memcpy(p,&v,4); +#elif SQLITE_BYTEORDER==1234 && defined(__GNUC__) && GCC_VERSION>=4003000 + u32 x = __builtin_bswap32(v); + memcpy(p,&x,4); +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 + u32 x = _byteswap_ulong(v); + memcpy(p,&x,4); +#else p[0] = (u8)(v>>24); p[1] = (u8)(v>>16); p[2] = (u8)(v>>8); p[3] = (u8)v; +#endif } @@ -24816,6 +25326,7 @@ SQLITE_PRIVATE u64 sqlite3LogEstToInt(LogEst x){ ** This is the implementation of generic hash-tables ** used in SQLite. */ +/* #include "sqliteInt.h" */ /* #include <assert.h> */ /* Turn bulk memory into a hash table object by initializing the @@ -25081,42 +25592,42 @@ SQLITE_PRIVATE void *sqlite3HashInsert(Hash *pH, const char *pKey, void *data){ #endif SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ static const char *const azName[] = { "?", - /* 1 */ "Function" OpHelp("r[P3]=func(r[P2@P5])"), - /* 2 */ "Savepoint" OpHelp(""), - /* 3 */ "AutoCommit" OpHelp(""), - /* 4 */ "Transaction" OpHelp(""), - /* 5 */ "SorterNext" OpHelp(""), - /* 6 */ "PrevIfOpen" OpHelp(""), - /* 7 */ "NextIfOpen" OpHelp(""), - /* 8 */ "Prev" OpHelp(""), - /* 9 */ "Next" OpHelp(""), - /* 10 */ "AggStep" OpHelp("accum=r[P3] step(r[P2@P5])"), - /* 11 */ "Checkpoint" OpHelp(""), - /* 12 */ "JournalMode" OpHelp(""), - /* 13 */ "Vacuum" OpHelp(""), - /* 14 */ "VFilter" OpHelp("iplan=r[P3] zplan='P4'"), - /* 15 */ "VUpdate" OpHelp("data=r[P3@P2]"), - /* 16 */ "Goto" OpHelp(""), - /* 17 */ "Gosub" OpHelp(""), - /* 18 */ "Return" OpHelp(""), + /* 1 */ "Savepoint" OpHelp(""), + /* 2 */ "AutoCommit" OpHelp(""), + /* 3 */ "Transaction" OpHelp(""), + /* 4 */ "SorterNext" OpHelp(""), + /* 5 */ "PrevIfOpen" OpHelp(""), + /* 6 */ "NextIfOpen" OpHelp(""), + /* 7 */ "Prev" OpHelp(""), + /* 8 */ "Next" OpHelp(""), + /* 9 */ "Checkpoint" OpHelp(""), + /* 10 */ "JournalMode" OpHelp(""), + /* 11 */ "Vacuum" OpHelp(""), + /* 12 */ "VFilter" OpHelp("iplan=r[P3] zplan='P4'"), + /* 13 */ "VUpdate" OpHelp("data=r[P3@P2]"), + /* 14 */ "Goto" OpHelp(""), + /* 15 */ "Gosub" OpHelp(""), + /* 16 */ "Return" OpHelp(""), + /* 17 */ "InitCoroutine" OpHelp(""), + /* 18 */ "EndCoroutine" OpHelp(""), /* 19 */ "Not" OpHelp("r[P2]= !r[P1]"), - /* 20 */ "InitCoroutine" OpHelp(""), - /* 21 */ "EndCoroutine" OpHelp(""), - /* 22 */ "Yield" OpHelp(""), - /* 23 */ "HaltIfNull" OpHelp("if r[P3]=null halt"), - /* 24 */ "Halt" OpHelp(""), - /* 25 */ "Integer" OpHelp("r[P2]=P1"), - /* 26 */ "Int64" OpHelp("r[P2]=P4"), - /* 27 */ "String" OpHelp("r[P2]='P4' (len=P1)"), - /* 28 */ "Null" OpHelp("r[P2..P3]=NULL"), - /* 29 */ "SoftNull" OpHelp("r[P1]=NULL"), - /* 30 */ "Blob" OpHelp("r[P2]=P4 (len=P1)"), - /* 31 */ "Variable" OpHelp("r[P2]=parameter(P1,P4)"), - /* 32 */ "Move" OpHelp("r[P2@P3]=r[P1@P3]"), - /* 33 */ "Copy" OpHelp("r[P2@P3+1]=r[P1@P3+1]"), - /* 34 */ "SCopy" OpHelp("r[P2]=r[P1]"), - /* 35 */ "ResultRow" OpHelp("output=r[P1@P2]"), - /* 36 */ "CollSeq" OpHelp(""), + /* 20 */ "Yield" OpHelp(""), + /* 21 */ "HaltIfNull" OpHelp("if r[P3]=null halt"), + /* 22 */ "Halt" OpHelp(""), + /* 23 */ "Integer" OpHelp("r[P2]=P1"), + /* 24 */ "Int64" OpHelp("r[P2]=P4"), + /* 25 */ "String" OpHelp("r[P2]='P4' (len=P1)"), + /* 26 */ "Null" OpHelp("r[P2..P3]=NULL"), + /* 27 */ "SoftNull" OpHelp("r[P1]=NULL"), + /* 28 */ "Blob" OpHelp("r[P2]=P4 (len=P1)"), + /* 29 */ "Variable" OpHelp("r[P2]=parameter(P1,P4)"), + /* 30 */ "Move" OpHelp("r[P2@P3]=r[P1@P3]"), + /* 31 */ "Copy" OpHelp("r[P2@P3+1]=r[P1@P3+1]"), + /* 32 */ "SCopy" OpHelp("r[P2]=r[P1]"), + /* 33 */ "ResultRow" OpHelp("output=r[P1@P2]"), + /* 34 */ "CollSeq" OpHelp(""), + /* 35 */ "Function0" OpHelp("r[P3]=func(r[P2@P5])"), + /* 36 */ "Function" OpHelp("r[P3]=func(r[P2@P5])"), /* 37 */ "AddImm" OpHelp("r[P1]=r[P1]+P2"), /* 38 */ "MustBeInt" OpHelp(""), /* 39 */ "RealAffinity" OpHelp(""), @@ -25142,20 +25653,20 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 59 */ "SequenceTest" OpHelp("if( cursor[P1].ctr++ ) pc = P2"), /* 60 */ "OpenPseudo" OpHelp("P3 columns in r[P2]"), /* 61 */ "Close" OpHelp(""), - /* 62 */ "SeekLT" OpHelp("key=r[P3@P4]"), - /* 63 */ "SeekLE" OpHelp("key=r[P3@P4]"), - /* 64 */ "SeekGE" OpHelp("key=r[P3@P4]"), - /* 65 */ "SeekGT" OpHelp("key=r[P3@P4]"), - /* 66 */ "Seek" OpHelp("intkey=r[P2]"), - /* 67 */ "NoConflict" OpHelp("key=r[P3@P4]"), - /* 68 */ "NotFound" OpHelp("key=r[P3@P4]"), - /* 69 */ "Found" OpHelp("key=r[P3@P4]"), - /* 70 */ "NotExists" OpHelp("intkey=r[P3]"), + /* 62 */ "ColumnsUsed" OpHelp(""), + /* 63 */ "SeekLT" OpHelp("key=r[P3@P4]"), + /* 64 */ "SeekLE" OpHelp("key=r[P3@P4]"), + /* 65 */ "SeekGE" OpHelp("key=r[P3@P4]"), + /* 66 */ "SeekGT" OpHelp("key=r[P3@P4]"), + /* 67 */ "Seek" OpHelp("intkey=r[P2]"), + /* 68 */ "NoConflict" OpHelp("key=r[P3@P4]"), + /* 69 */ "NotFound" OpHelp("key=r[P3@P4]"), + /* 70 */ "Found" OpHelp("key=r[P3@P4]"), /* 71 */ "Or" OpHelp("r[P3]=(r[P1] || r[P2])"), /* 72 */ "And" OpHelp("r[P3]=(r[P1] && r[P2])"), - /* 73 */ "Sequence" OpHelp("r[P2]=cursor[P1].ctr++"), - /* 74 */ "NewRowid" OpHelp("r[P2]=rowid"), - /* 75 */ "Insert" OpHelp("intkey=r[P3] data=r[P2]"), + /* 73 */ "NotExists" OpHelp("intkey=r[P3]"), + /* 74 */ "Sequence" OpHelp("r[P2]=cursor[P1].ctr++"), + /* 75 */ "NewRowid" OpHelp("r[P2]=rowid"), /* 76 */ "IsNull" OpHelp("if r[P1]==NULL goto P2"), /* 77 */ "NotNull" OpHelp("if r[P1]!=NULL goto P2"), /* 78 */ "Ne" OpHelp("if r[P1]!=r[P3] goto P2"), @@ -25164,7 +25675,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 81 */ "Le" OpHelp("if r[P1]<=r[P3] goto P2"), /* 82 */ "Lt" OpHelp("if r[P1]<r[P3] goto P2"), /* 83 */ "Ge" OpHelp("if r[P1]>=r[P3] goto P2"), - /* 84 */ "InsertInt" OpHelp("intkey=P3 data=r[P2]"), + /* 84 */ "Insert" OpHelp("intkey=r[P3] data=r[P2]"), /* 85 */ "BitAnd" OpHelp("r[P3]=r[P1]&r[P2]"), /* 86 */ "BitOr" OpHelp("r[P3]=r[P1]|r[P2]"), /* 87 */ "ShiftLeft" OpHelp("r[P3]=r[P2]<<r[P1]"), @@ -25175,69 +25686,72 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ /* 92 */ "Divide" OpHelp("r[P3]=r[P2]/r[P1]"), /* 93 */ "Remainder" OpHelp("r[P3]=r[P2]%r[P1]"), /* 94 */ "Concat" OpHelp("r[P3]=r[P2]+r[P1]"), - /* 95 */ "Delete" OpHelp(""), + /* 95 */ "InsertInt" OpHelp("intkey=P3 data=r[P2]"), /* 96 */ "BitNot" OpHelp("r[P1]= ~r[P1]"), /* 97 */ "String8" OpHelp("r[P2]='P4'"), - /* 98 */ "ResetCount" OpHelp(""), - /* 99 */ "SorterCompare" OpHelp("if key(P1)!=trim(r[P3],P4) goto P2"), - /* 100 */ "SorterData" OpHelp("r[P2]=data"), - /* 101 */ "RowKey" OpHelp("r[P2]=key"), - /* 102 */ "RowData" OpHelp("r[P2]=data"), - /* 103 */ "Rowid" OpHelp("r[P2]=rowid"), - /* 104 */ "NullRow" OpHelp(""), - /* 105 */ "Last" OpHelp(""), - /* 106 */ "SorterSort" OpHelp(""), - /* 107 */ "Sort" OpHelp(""), - /* 108 */ "Rewind" OpHelp(""), - /* 109 */ "SorterInsert" OpHelp(""), - /* 110 */ "IdxInsert" OpHelp("key=r[P2]"), - /* 111 */ "IdxDelete" OpHelp("key=r[P2@P3]"), - /* 112 */ "IdxRowid" OpHelp("r[P2]=rowid"), - /* 113 */ "IdxLE" OpHelp("key=r[P3@P4]"), - /* 114 */ "IdxGT" OpHelp("key=r[P3@P4]"), - /* 115 */ "IdxLT" OpHelp("key=r[P3@P4]"), - /* 116 */ "IdxGE" OpHelp("key=r[P3@P4]"), - /* 117 */ "Destroy" OpHelp(""), - /* 118 */ "Clear" OpHelp(""), - /* 119 */ "ResetSorter" OpHelp(""), - /* 120 */ "CreateIndex" OpHelp("r[P2]=root iDb=P1"), - /* 121 */ "CreateTable" OpHelp("r[P2]=root iDb=P1"), - /* 122 */ "ParseSchema" OpHelp(""), - /* 123 */ "LoadAnalysis" OpHelp(""), - /* 124 */ "DropTable" OpHelp(""), - /* 125 */ "DropIndex" OpHelp(""), - /* 126 */ "DropTrigger" OpHelp(""), - /* 127 */ "IntegrityCk" OpHelp(""), - /* 128 */ "RowSetAdd" OpHelp("rowset(P1)=r[P2]"), - /* 129 */ "RowSetRead" OpHelp("r[P3]=rowset(P1)"), - /* 130 */ "RowSetTest" OpHelp("if r[P3] in rowset(P1) goto P2"), - /* 131 */ "Program" OpHelp(""), - /* 132 */ "Param" OpHelp(""), + /* 98 */ "Delete" OpHelp(""), + /* 99 */ "ResetCount" OpHelp(""), + /* 100 */ "SorterCompare" OpHelp("if key(P1)!=trim(r[P3],P4) goto P2"), + /* 101 */ "SorterData" OpHelp("r[P2]=data"), + /* 102 */ "RowKey" OpHelp("r[P2]=key"), + /* 103 */ "RowData" OpHelp("r[P2]=data"), + /* 104 */ "Rowid" OpHelp("r[P2]=rowid"), + /* 105 */ "NullRow" OpHelp(""), + /* 106 */ "Last" OpHelp(""), + /* 107 */ "SorterSort" OpHelp(""), + /* 108 */ "Sort" OpHelp(""), + /* 109 */ "Rewind" OpHelp(""), + /* 110 */ "SorterInsert" OpHelp(""), + /* 111 */ "IdxInsert" OpHelp("key=r[P2]"), + /* 112 */ "IdxDelete" OpHelp("key=r[P2@P3]"), + /* 113 */ "IdxRowid" OpHelp("r[P2]=rowid"), + /* 114 */ "IdxLE" OpHelp("key=r[P3@P4]"), + /* 115 */ "IdxGT" OpHelp("key=r[P3@P4]"), + /* 116 */ "IdxLT" OpHelp("key=r[P3@P4]"), + /* 117 */ "IdxGE" OpHelp("key=r[P3@P4]"), + /* 118 */ "Destroy" OpHelp(""), + /* 119 */ "Clear" OpHelp(""), + /* 120 */ "ResetSorter" OpHelp(""), + /* 121 */ "CreateIndex" OpHelp("r[P2]=root iDb=P1"), + /* 122 */ "CreateTable" OpHelp("r[P2]=root iDb=P1"), + /* 123 */ "ParseSchema" OpHelp(""), + /* 124 */ "LoadAnalysis" OpHelp(""), + /* 125 */ "DropTable" OpHelp(""), + /* 126 */ "DropIndex" OpHelp(""), + /* 127 */ "DropTrigger" OpHelp(""), + /* 128 */ "IntegrityCk" OpHelp(""), + /* 129 */ "RowSetAdd" OpHelp("rowset(P1)=r[P2]"), + /* 130 */ "RowSetRead" OpHelp("r[P3]=rowset(P1)"), + /* 131 */ "RowSetTest" OpHelp("if r[P3] in rowset(P1) goto P2"), + /* 132 */ "Program" OpHelp(""), /* 133 */ "Real" OpHelp("r[P2]=P4"), - /* 134 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), - /* 135 */ "FkIfZero" OpHelp("if fkctr[P1]==0 goto P2"), - /* 136 */ "MemMax" OpHelp("r[P1]=max(r[P1],r[P2])"), - /* 137 */ "IfPos" OpHelp("if r[P1]>0 goto P2"), - /* 138 */ "IfNeg" OpHelp("r[P1]+=P3, if r[P1]<0 goto P2"), - /* 139 */ "IfNotZero" OpHelp("if r[P1]!=0 then r[P1]+=P3, goto P2"), - /* 140 */ "DecrJumpZero" OpHelp("if (--r[P1])==0 goto P2"), - /* 141 */ "JumpZeroIncr" OpHelp("if (r[P1]++)==0 ) goto P2"), - /* 142 */ "AggFinal" OpHelp("accum=r[P1] N=P2"), - /* 143 */ "IncrVacuum" OpHelp(""), - /* 144 */ "Expire" OpHelp(""), - /* 145 */ "TableLock" OpHelp("iDb=P1 root=P2 write=P3"), - /* 146 */ "VBegin" OpHelp(""), - /* 147 */ "VCreate" OpHelp(""), - /* 148 */ "VDestroy" OpHelp(""), - /* 149 */ "VOpen" OpHelp(""), - /* 150 */ "VColumn" OpHelp("r[P3]=vcolumn(P2)"), - /* 151 */ "VNext" OpHelp(""), - /* 152 */ "VRename" OpHelp(""), - /* 153 */ "Pagecount" OpHelp(""), - /* 154 */ "MaxPgcnt" OpHelp(""), - /* 155 */ "Init" OpHelp("Start at P2"), - /* 156 */ "Noop" OpHelp(""), - /* 157 */ "Explain" OpHelp(""), + /* 134 */ "Param" OpHelp(""), + /* 135 */ "FkCounter" OpHelp("fkctr[P1]+=P2"), + /* 136 */ "FkIfZero" OpHelp("if fkctr[P1]==0 goto P2"), + /* 137 */ "MemMax" OpHelp("r[P1]=max(r[P1],r[P2])"), + /* 138 */ "IfPos" OpHelp("if r[P1]>0 goto P2"), + /* 139 */ "IfNeg" OpHelp("r[P1]+=P3, if r[P1]<0 goto P2"), + /* 140 */ "IfNotZero" OpHelp("if r[P1]!=0 then r[P1]+=P3, goto P2"), + /* 141 */ "DecrJumpZero" OpHelp("if (--r[P1])==0 goto P2"), + /* 142 */ "JumpZeroIncr" OpHelp("if (r[P1]++)==0 ) goto P2"), + /* 143 */ "AggStep0" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 144 */ "AggStep" OpHelp("accum=r[P3] step(r[P2@P5])"), + /* 145 */ "AggFinal" OpHelp("accum=r[P1] N=P2"), + /* 146 */ "IncrVacuum" OpHelp(""), + /* 147 */ "Expire" OpHelp(""), + /* 148 */ "TableLock" OpHelp("iDb=P1 root=P2 write=P3"), + /* 149 */ "VBegin" OpHelp(""), + /* 150 */ "VCreate" OpHelp(""), + /* 151 */ "VDestroy" OpHelp(""), + /* 152 */ "VOpen" OpHelp(""), + /* 153 */ "VColumn" OpHelp("r[P3]=vcolumn(P2)"), + /* 154 */ "VNext" OpHelp(""), + /* 155 */ "VRename" OpHelp(""), + /* 156 */ "Pagecount" OpHelp(""), + /* 157 */ "MaxPgcnt" OpHelp(""), + /* 158 */ "Init" OpHelp("Start at P2"), + /* 159 */ "Noop" OpHelp(""), + /* 160 */ "Explain" OpHelp(""), }; return azName[i]; } @@ -25290,6 +25804,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){ ** * Definitions of sqlite3_vfs objects for all locking methods ** plus implementations of sqlite3_os_init() and sqlite3_os_end(). */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_UNIX /* This file is used on unix only */ /* @@ -26073,14 +26588,14 @@ static int robust_open(const char *z, int f, mode_t m){ ** unixEnterLeave() */ static void unixEnterMutex(void){ - sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void unixLeaveMutex(void){ - sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifdef SQLITE_DEBUG static int unixMutexHeld(void) { - return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif @@ -33024,6 +33539,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void){ ** ** This file contains code that is specific to Windows. */ +/* #include "sqliteInt.h" */ #if SQLITE_OS_WIN /* This file is used for Windows only */ /* @@ -33232,6 +33748,7 @@ SQLITE_API int sqlite3_open_file_count = 0; /* ** Include the header file for the Windows VFS. */ +/* #include "os_win.h" */ /* ** Compiling and using WAL mode requires several APIs that are only @@ -36223,6 +36740,12 @@ static int winLock(sqlite3_file *id, int locktype){ return SQLITE_OK; } + /* Do not allow any kind of write-lock on a read-only database + */ + if( (pFile->ctrlFlags & WINFILE_RDONLY)!=0 && locktype>=RESERVED_LOCK ){ + return SQLITE_IOERR_LOCK; + } + /* Make sure the locking sequence is correct */ assert( pFile->locktype!=NO_LOCK || locktype==SHARED_LOCK ); @@ -36592,14 +37115,14 @@ static SYSTEM_INFO winSysInfo; ** winShmLeaveMutex() */ static void winShmEnterMutex(void){ - sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } static void winShmLeaveMutex(void){ - sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #ifndef NDEBUG static int winShmMutexHeld(void) { - return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER)); + return sqlite3_mutex_held(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_VFS1)); } #endif @@ -38619,14 +39142,14 @@ static int winRandomness(sqlite3_vfs *pVfs, int nBuf, char *zBuf){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreate(&id); - memcpy(zBuf, &id, sizeof(UUID)); + memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } if( sizeof(UUID)<=nBuf-n ){ UUID id; memset(&id, 0, sizeof(UUID)); osUuidCreateSequential(&id); - memcpy(zBuf, &id, sizeof(UUID)); + memcpy(&zBuf[n], &id, sizeof(UUID)); n += sizeof(UUID); } #endif @@ -38879,6 +39402,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_os_end(void){ ** start of a transaction, and is thus usually less than a few thousand, ** but can be as large as 2 billion for a really big database. */ +/* #include "sqliteInt.h" */ /* Size of the Bitvec structure in bytes. */ #define BITVEC_SZ 512 @@ -38970,10 +39494,10 @@ SQLITE_PRIVATE Bitvec *sqlite3BitvecCreate(u32 iSize){ ** If p is NULL (if the bitmap has not been created) or if ** i is out of range, then return false. */ -SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ - if( p==0 ) return 0; - if( i>p->iSize || i==0 ) return 0; +SQLITE_PRIVATE int sqlite3BitvecTestNotNull(Bitvec *p, u32 i){ + assert( p!=0 ); i--; + if( i>=p->iSize ) return 0; while( p->iDivisor ){ u32 bin = i/p->iDivisor; i = i%p->iDivisor; @@ -38993,6 +39517,9 @@ SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ return 0; } } +SQLITE_PRIVATE int sqlite3BitvecTest(Bitvec *p, u32 i){ + return p!=0 && sqlite3BitvecTestNotNull(p,i); +} /* ** Set the i-th bit. Return 0 on success and an error code if @@ -39265,6 +39792,7 @@ bitvec_end: ************************************************************************* ** This file implements that page cache. */ +/* #include "sqliteInt.h" */ /* ** A complete page cache is an instance of this structure. @@ -39281,7 +39809,6 @@ struct PCache { int (*xStress)(void*,PgHdr*); /* Call to try make a page clean */ void *pStress; /* Argument to xStress */ sqlite3_pcache *pCache; /* Pluggable cache module */ - PgHdr *pPage1; /* Reference to page 1 */ }; /********************************** Linked List Management ********************/ @@ -39359,9 +39886,6 @@ static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){ */ static void pcacheUnpin(PgHdr *p){ if( p->pCache->bPurgeable ){ - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0); } } @@ -39454,7 +39978,6 @@ SQLITE_PRIVATE int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){ sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache); } pCache->pCache = pNew; - pCache->pPage1 = 0; pCache->szPage = szPage; } return SQLITE_OK; @@ -39579,13 +40102,14 @@ static SQLITE_NOINLINE PgHdr *pcacheFetchFinishWithInit( assert( pPage!=0 ); pPgHdr = (PgHdr*)pPage->pExtra; assert( pPgHdr->pPage==0 ); - memset(pPgHdr, 0, sizeof(PgHdr)); + memset(pPgHdr, 0, sizeof(PgHdr)); pPgHdr->pPage = pPage; pPgHdr->pData = pPage->pBuf; pPgHdr->pExtra = (void *)&pPgHdr[1]; memset(pPgHdr->pExtra, 0, pCache->szExtra); pPgHdr->pCache = pCache; pPgHdr->pgno = pgno; + pPgHdr->flags = PGHDR_CLEAN; return sqlite3PcacheFetchFinish(pCache,pgno,pPage); } @@ -39602,7 +40126,7 @@ SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( ){ PgHdr *pPgHdr; - if( pPage==0 ) return 0; + assert( pPage!=0 ); pPgHdr = (PgHdr *)pPage->pExtra; if( !pPgHdr->pPage ){ @@ -39612,9 +40136,6 @@ SQLITE_PRIVATE PgHdr *sqlite3PcacheFetchFinish( pCache->nRef++; } pPgHdr->nRef++; - if( pgno==1 ){ - pCache->pPage1 = pPgHdr; - } return pPgHdr; } @@ -39627,7 +40148,7 @@ SQLITE_PRIVATE void SQLITE_NOINLINE sqlite3PcacheRelease(PgHdr *p){ p->nRef--; if( p->nRef==0 ){ p->pCache->nRef--; - if( (p->flags&PGHDR_DIRTY)==0 ){ + if( p->flags&PGHDR_CLEAN ){ pcacheUnpin(p); }else if( p->pDirtyPrev!=0 ){ /* Move the page to the head of the dirty list. */ @@ -39655,9 +40176,6 @@ SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); } p->pCache->nRef--; - if( p->pgno==1 ){ - p->pCache->pPage1 = 0; - } sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 1); } @@ -39666,11 +40184,14 @@ SQLITE_PRIVATE void sqlite3PcacheDrop(PgHdr *p){ ** make it so. */ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ - p->flags &= ~PGHDR_DONT_WRITE; assert( p->nRef>0 ); - if( 0==(p->flags & PGHDR_DIRTY) ){ - p->flags |= PGHDR_DIRTY; - pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + if( p->flags & (PGHDR_CLEAN|PGHDR_DONT_WRITE) ){ + p->flags &= ~PGHDR_DONT_WRITE; + if( p->flags & PGHDR_CLEAN ){ + p->flags ^= (PGHDR_DIRTY|PGHDR_CLEAN); + assert( (p->flags & (PGHDR_DIRTY|PGHDR_CLEAN))==PGHDR_DIRTY ); + pcacheManageDirtyList(p, PCACHE_DIRTYLIST_ADD); + } } } @@ -39680,8 +40201,10 @@ SQLITE_PRIVATE void sqlite3PcacheMakeDirty(PgHdr *p){ */ SQLITE_PRIVATE void sqlite3PcacheMakeClean(PgHdr *p){ if( (p->flags & PGHDR_DIRTY) ){ + assert( (p->flags & PGHDR_CLEAN)==0 ); pcacheManageDirtyList(p, PCACHE_DIRTYLIST_REMOVE); - p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC); + p->flags &= ~(PGHDR_DIRTY|PGHDR_NEED_SYNC|PGHDR_WRITEABLE); + p->flags |= PGHDR_CLEAN; if( p->nRef==0 ){ pcacheUnpin(p); } @@ -39748,9 +40271,14 @@ SQLITE_PRIVATE void sqlite3PcacheTruncate(PCache *pCache, Pgno pgno){ sqlite3PcacheMakeClean(p); } } - if( pgno==0 && pCache->pPage1 ){ - memset(pCache->pPage1->pData, 0, pCache->szPage); - pgno = 1; + if( pgno==0 && pCache->nRef ){ + sqlite3_pcache_page *pPage1; + pPage1 = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache,1,0); + if( ALWAYS(pPage1) ){ /* Page 1 is always available in cache, because + ** pCache->nRef>0 */ + memset(pPage1->pBuf, 0, pCache->szPage); + pgno = 1; + } } sqlite3GlobalConfig.pcache2.xTruncate(pCache->pCache, pgno+1); } @@ -39941,8 +40469,72 @@ SQLITE_PRIVATE void sqlite3PcacheIterateDirty(PCache *pCache, void (*xIter)(PgHd ** of the SQLITE_CONFIG_PAGECACHE and sqlite3_release_memory() features. ** If the default page cache implementation is overridden, then neither of ** these two features are available. +** +** A Page cache line looks like this: +** +** ------------------------------------------------------------- +** | database page content | PgHdr1 | MemPage | PgHdr | +** ------------------------------------------------------------- +** +** The database page content is up front (so that buffer overreads tend to +** flow harmlessly into the PgHdr1, MemPage, and PgHdr extensions). MemPage +** is the extension added by the btree.c module containing information such +** as the database page number and how that database page is used. PgHdr +** is added by the pcache.c layer and contains information used to keep track +** of which pages are "dirty". PgHdr1 is an extension added by this +** module (pcache1.c). The PgHdr1 header is a subclass of sqlite3_pcache_page. +** PgHdr1 contains information needed to look up a page by its page number. +** The superclass sqlite3_pcache_page.pBuf points to the start of the +** database page content and sqlite3_pcache_page.pExtra points to PgHdr. +** +** The size of the extension (MemPage+PgHdr+PgHdr1) can be determined at +** runtime using sqlite3_config(SQLITE_CONFIG_PCACHE_HDRSZ, &size). The +** sizes of the extensions sum to 272 bytes on x64 for 3.8.10, but this +** size can vary according to architecture, compile-time options, and +** SQLite library version number. +** +** If SQLITE_PCACHE_SEPARATE_HEADER is defined, then the extension is obtained +** using a separate memory allocation from the database page content. This +** seeks to overcome the "clownshoe" problem (also called "internal +** fragmentation" in academic literature) of allocating a few bytes more +** than a power of two with the memory allocator rounding up to the next +** power of two, and leaving the rounded-up space unused. +** +** This module tracks pointers to PgHdr1 objects. Only pcache.c communicates +** with this module. Information is passed back and forth as PgHdr1 pointers. +** +** The pcache.c and pager.c modules deal pointers to PgHdr objects. +** The btree.c module deals with pointers to MemPage objects. +** +** SOURCE OF PAGE CACHE MEMORY: +** +** Memory for a page might come from any of three sources: +** +** (1) The general-purpose memory allocator - sqlite3Malloc() +** (2) Global page-cache memory provided using sqlite3_config() with +** SQLITE_CONFIG_PAGECACHE. +** (3) PCache-local bulk allocation. +** +** The third case is a chunk of heap memory (defaulting to 100 pages worth) +** that is allocated when the page cache is created. The size of the local +** bulk allocation can be adjusted using +** +** sqlite3_config(SQLITE_CONFIG_PCACHE, 0, 0, N). +** +** If N is positive, then N pages worth of memory are allocated using a single +** sqlite3Malloc() call and that memory is used for the first N pages allocated. +** Or if N is negative, then -1024*N bytes of memory are allocated and used +** for as many pages as can be accomodated. +** +** Only one of (2) or (3) can be used. Once the memory available to (2) or +** (3) is exhausted, subsequent allocations fail over to the general-purpose +** memory allocator (1). +** +** Earlier versions of SQLite used only methods (1) and (2). But experiments +** show that method (3) with N==100 provides about a 5% performance boost for +** common workloads. */ - +/* #include "sqliteInt.h" */ typedef struct PCache1 PCache1; typedef struct PgHdr1 PgHdr1; @@ -39995,8 +40587,9 @@ struct PCache1 { ** The PGroup mutex must be held when accessing nMax. */ PGroup *pGroup; /* PGroup this cache belongs to */ - int szPage; /* Size of allocated pages in bytes */ - int szExtra; /* Size of extra space in bytes */ + int szPage; /* Size of database content section */ + int szExtra; /* sizeof(MemPage)+sizeof(PgHdr) */ + int szAlloc; /* Total size of one pcache line */ int bPurgeable; /* True if cache is purgeable */ unsigned int nMin; /* Minimum number of pages reserved */ unsigned int nMax; /* Configured "cache_size" value */ @@ -40010,6 +40603,8 @@ struct PCache1 { unsigned int nPage; /* Total number of pages in apHash */ unsigned int nHash; /* Number of slots in apHash[] */ PgHdr1 **apHash; /* Hash table for fast lookup by key */ + PgHdr1 *pFree; /* List of unused pcache-local pages */ + void *pBulk; /* Bulk memory used by pcache-local */ }; /* @@ -40022,6 +40617,7 @@ struct PgHdr1 { sqlite3_pcache_page page; unsigned int iKey; /* Key value (page number) */ u8 isPinned; /* Page in use, not on the LRU list */ + u8 isBulkLocal; /* This page from bulk local storage */ PgHdr1 *pNext; /* Next in hash table chain */ PCache1 *pCache; /* Cache that currently owns this page */ PgHdr1 *pLruNext; /* Next in LRU list of unpinned pages */ @@ -40029,8 +40625,8 @@ struct PgHdr1 { }; /* -** Free slots in the allocator used to divide up the buffer provided using -** the SQLITE_CONFIG_PAGECACHE mechanism. +** Free slots in the allocator used to divide up the global page cache +** buffer provided using the SQLITE_CONFIG_PAGECACHE mechanism. */ struct PgFreeslot { PgFreeslot *pNext; /* Next free slot */ @@ -40048,10 +40644,12 @@ static SQLITE_WSD struct PCacheGlobal { ** The nFreeSlot and pFree values do require mutex protection. */ int isInit; /* True if initialized */ + int separateCache; /* Use a new PGroup for each PCache */ + int nInitPage; /* Initial bulk allocation size */ int szSlot; /* Size of each free slot */ int nSlot; /* The number of pcache slots */ int nReserve; /* Try to keep nFreeSlot above this */ - void *pStart, *pEnd; /* Bounds of pagecache malloc range */ + void *pStart, *pEnd; /* Bounds of global page cache memory */ /* Above requires no mutex. Use mutex below for variable that follow. */ sqlite3_mutex *mutex; /* Mutex for accessing the following: */ PgFreeslot *pFree; /* Free page blocks */ @@ -40073,8 +40671,15 @@ static SQLITE_WSD struct PCacheGlobal { /* ** Macros to enter and leave the PCache LRU mutex. */ -#define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) -#define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +#if !defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 +# define pcache1EnterMutex(X) assert((X)->mutex==0) +# define pcache1LeaveMutex(X) assert((X)->mutex==0) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 0 +#else +# define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) +# define pcache1LeaveMutex(X) sqlite3_mutex_leave((X)->mutex) +# define PCACHE1_MIGHT_USE_GROUP_MUTEX 1 +#endif /******************************************************************************/ /******** Page Allocation/SQLITE_CONFIG_PCACHE Related Functions **************/ @@ -40091,6 +40696,7 @@ static SQLITE_WSD struct PCacheGlobal { SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ if( pcache1.isInit ){ PgFreeslot *p; + if( pBuf==0 ) sz = n = 0; sz = ROUNDDOWN8(sz); pcache1.szSlot = sz; pcache1.nSlot = pcache1.nFreeSlot = n; @@ -40108,6 +40714,43 @@ SQLITE_PRIVATE void sqlite3PCacheBufferSetup(void *pBuf, int sz, int n){ } } +/* +** Try to initialize the pCache->pFree and pCache->pBulk fields. Return +** true if pCache->pFree ends up containing one or more free pages. +*/ +static int pcache1InitBulk(PCache1 *pCache){ + i64 szBulk; + char *zBulk; + if( pcache1.nInitPage==0 ) return 0; + /* Do not bother with a bulk allocation if the cache size very small */ + if( pCache->nMax<3 ) return 0; + sqlite3BeginBenignMalloc(); + if( pcache1.nInitPage>0 ){ + szBulk = pCache->szAlloc * (i64)pcache1.nInitPage; + }else{ + szBulk = -1024 * (i64)pcache1.nInitPage; + } + if( szBulk > pCache->szAlloc*(i64)pCache->nMax ){ + szBulk = pCache->szAlloc*pCache->nMax; + } + zBulk = pCache->pBulk = sqlite3Malloc( szBulk ); + sqlite3EndBenignMalloc(); + if( zBulk ){ + int nBulk = sqlite3MallocSize(zBulk)/pCache->szAlloc; + int i; + for(i=0; i<nBulk; i++){ + PgHdr1 *pX = (PgHdr1*)&zBulk[pCache->szPage]; + pX->page.pBuf = zBulk; + pX->page.pExtra = &pX[1]; + pX->isBulkLocal = 1; + pX->pNext = pCache->pFree; + pCache->pFree = pX; + zBulk += pCache->szAlloc; + } + } + return pCache->pFree!=0; +} + /* ** Malloc function used within this file to allocate space from the buffer ** configured using sqlite3_config(SQLITE_CONFIG_PAGECACHE) option. If no @@ -40155,9 +40798,9 @@ static void *pcache1Alloc(int nByte){ /* ** Free an allocated buffer obtained from pcache1Alloc(). */ -static int pcache1Free(void *p){ +static void pcache1Free(void *p){ int nFreed = 0; - if( p==0 ) return 0; + if( p==0 ) return; if( p>=pcache1.pStart && p<pcache1.pEnd ){ PgFreeslot *pSlot; sqlite3_mutex_enter(pcache1.mutex); @@ -40172,15 +40815,14 @@ static int pcache1Free(void *p){ }else{ assert( sqlite3MemdebugHasType(p, MEMTYPE_PCACHE) ); sqlite3MemdebugSetType(p, MEMTYPE_HEAP); - nFreed = sqlite3MallocSize(p); #ifndef SQLITE_DISABLE_PAGECACHE_OVERFLOW_STATS + nFreed = sqlite3MallocSize(p); sqlite3_mutex_enter(pcache1.mutex); sqlite3StatusDown(SQLITE_STATUS_PAGECACHE_OVERFLOW, nFreed); sqlite3_mutex_leave(pcache1.mutex); #endif sqlite3_free(p); } - return nFreed; } #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT @@ -40208,54 +40850,65 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ PgHdr1 *p = 0; void *pPg; - /* The group mutex must be released before pcache1Alloc() is called. This - ** is because it may call sqlite3_release_memory(), which assumes that - ** this mutex is not held. */ assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); - pcache1LeaveMutex(pCache->pGroup); + if( pCache->pFree || (pCache->nPage==0 && pcache1InitBulk(pCache)) ){ + p = pCache->pFree; + pCache->pFree = p->pNext; + p->pNext = 0; + }else{ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it might call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( pcache1.separateCache==0 ); + assert( pCache->pGroup==&pcache1.grp ); + pcache1LeaveMutex(pCache->pGroup); +#endif #ifdef SQLITE_PCACHE_SEPARATE_HEADER - pPg = pcache1Alloc(pCache->szPage); - p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); - if( !pPg || !p ){ - pcache1Free(pPg); - sqlite3_free(p); - pPg = 0; - } + pPg = pcache1Alloc(pCache->szPage); + p = sqlite3Malloc(sizeof(PgHdr1) + pCache->szExtra); + if( !pPg || !p ){ + pcache1Free(pPg); + sqlite3_free(p); + pPg = 0; + } #else - pPg = pcache1Alloc(ROUND8(sizeof(PgHdr1)) + pCache->szPage + pCache->szExtra); - p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; + pPg = pcache1Alloc(pCache->szAlloc); + p = (PgHdr1 *)&((u8 *)pPg)[pCache->szPage]; #endif - pcache1EnterMutex(pCache->pGroup); - - if( pPg ){ +#ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg==0 ) return 0; p->page.pBuf = pPg; p->page.pExtra = &p[1]; - if( pCache->bPurgeable ){ - pCache->pGroup->nCurrentPage++; - } - return p; + p->isBulkLocal = 0; } - return 0; + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage++; + } + return p; } /* ** Free a page object allocated by pcache1AllocPage(). -** -** The pointer is allowed to be NULL, which is prudent. But it turns out -** that the current implementation happens to never call this routine -** with a NULL pointer, so we mark the NULL test with ALWAYS(). */ static void pcache1FreePage(PgHdr1 *p){ - if( ALWAYS(p) ){ - PCache1 *pCache = p->pCache; - assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + PCache1 *pCache; + assert( p!=0 ); + pCache = p->pCache; + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + if( p->isBulkLocal ){ + p->pNext = pCache->pFree; + pCache->pFree = p; + }else{ pcache1Free(p->page.pBuf); #ifdef SQLITE_PCACHE_SEPARATE_HEADER sqlite3_free(p); #endif - if( pCache->bPurgeable ){ - pCache->pGroup->nCurrentPage--; - } + } + if( pCache->bPurgeable ){ + pCache->pGroup->nCurrentPage--; } } @@ -40350,41 +41003,41 @@ static void pcache1ResizeHash(PCache1 *p){ ** ** The PGroup mutex must be held when this function is called. */ -static void pcache1PinPage(PgHdr1 *pPage){ +static PgHdr1 *pcache1PinPage(PgHdr1 *pPage){ PCache1 *pCache; - PGroup *pGroup; assert( pPage!=0 ); assert( pPage->isPinned==0 ); pCache = pPage->pCache; - pGroup = pCache->pGroup; - assert( pPage->pLruNext || pPage==pGroup->pLruTail ); - assert( pPage->pLruPrev || pPage==pGroup->pLruHead ); - assert( sqlite3_mutex_held(pGroup->mutex) ); + assert( pPage->pLruNext || pPage==pCache->pGroup->pLruTail ); + assert( pPage->pLruPrev || pPage==pCache->pGroup->pLruHead ); + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); if( pPage->pLruPrev ){ pPage->pLruPrev->pLruNext = pPage->pLruNext; }else{ - pGroup->pLruHead = pPage->pLruNext; + pCache->pGroup->pLruHead = pPage->pLruNext; } if( pPage->pLruNext ){ pPage->pLruNext->pLruPrev = pPage->pLruPrev; }else{ - pGroup->pLruTail = pPage->pLruPrev; + pCache->pGroup->pLruTail = pPage->pLruPrev; } pPage->pLruNext = 0; pPage->pLruPrev = 0; pPage->isPinned = 1; pCache->nRecyclable--; + return pPage; } /* ** Remove the page supplied as an argument from the hash table ** (PCache1.apHash structure) that it is currently stored in. +** Also free the page if freePage is true. ** ** The PGroup mutex must be held when this function is called. */ -static void pcache1RemoveFromHash(PgHdr1 *pPage){ +static void pcache1RemoveFromHash(PgHdr1 *pPage, int freeFlag){ unsigned int h; PCache1 *pCache = pPage->pCache; PgHdr1 **pp; @@ -40395,21 +41048,26 @@ static void pcache1RemoveFromHash(PgHdr1 *pPage){ *pp = (*pp)->pNext; pCache->nPage--; + if( freeFlag ) pcache1FreePage(pPage); } /* ** If there are currently more than nMaxPage pages allocated, try ** to recycle pages to reduce the number allocated to nMaxPage. */ -static void pcache1EnforceMaxPage(PGroup *pGroup){ +static void pcache1EnforceMaxPage(PCache1 *pCache){ + PGroup *pGroup = pCache->pGroup; assert( sqlite3_mutex_held(pGroup->mutex) ); while( pGroup->nCurrentPage>pGroup->nMaxPage && pGroup->pLruTail ){ PgHdr1 *p = pGroup->pLruTail; assert( p->pCache->pGroup==pGroup ); assert( p->isPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); + } + if( pCache->nPage==0 && pCache->pBulk ){ + sqlite3_free(pCache->pBulk); + pCache->pBulk = pCache->pFree = 0; } } @@ -40455,10 +41113,45 @@ static int pcache1Init(void *NotUsed){ UNUSED_PARAMETER(NotUsed); assert( pcache1.isInit==0 ); memset(&pcache1, 0, sizeof(pcache1)); + + + /* + ** The pcache1.separateCache variable is true if each PCache has its own + ** private PGroup (mode-1). pcache1.separateCache is false if the single + ** PGroup in pcache1.grp is used for all page caches (mode-2). + ** + ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT + ** + ** * Use a unified cache in single-threaded applications that have + ** configured a start-time buffer for use as page-cache memory using + ** sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL + ** pBuf argument. + ** + ** * Otherwise use separate caches (mode-1) + */ +#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) + pcache1.separateCache = 0; +#elif SQLITE_THREADSAFE + pcache1.separateCache = sqlite3GlobalConfig.pPage==0 + || sqlite3GlobalConfig.bCoreMutex>0; +#else + pcache1.separateCache = sqlite3GlobalConfig.pPage==0; +#endif + +#if SQLITE_THREADSAFE if( sqlite3GlobalConfig.bCoreMutex ){ pcache1.grp.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU); pcache1.mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_PMEM); } +#endif + if( pcache1.separateCache + && sqlite3GlobalConfig.nPage!=0 + && sqlite3GlobalConfig.pPage==0 + ){ + pcache1.nInitPage = sqlite3GlobalConfig.nPage; + }else{ + pcache1.nInitPage = 0; + } pcache1.grp.mxPinned = 10; pcache1.isInit = 1; return SQLITE_OK; @@ -40488,31 +41181,13 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ PGroup *pGroup; /* The group the new page cache will belong to */ int sz; /* Bytes of memory required to allocate the new cache */ - /* - ** The separateCache variable is true if each PCache has its own private - ** PGroup. In other words, separateCache is true for mode (1) where no - ** mutexing is required. - ** - ** * Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT - ** - ** * Always use a unified cache in single-threaded applications - ** - ** * Otherwise (if multi-threaded and ENABLE_MEMORY_MANAGEMENT is off) - ** use separate caches (mode-1) - */ -#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT) || SQLITE_THREADSAFE==0 - const int separateCache = 0; -#else - int separateCache = sqlite3GlobalConfig.bCoreMutex>0; -#endif - assert( (szPage & (szPage-1))==0 && szPage>=512 && szPage<=65536 ); assert( szExtra < 300 ); - sz = sizeof(PCache1) + sizeof(PGroup)*separateCache; + sz = sizeof(PCache1) + sizeof(PGroup)*pcache1.separateCache; pCache = (PCache1 *)sqlite3MallocZero(sz); if( pCache ){ - if( separateCache ){ + if( pcache1.separateCache ){ pGroup = (PGroup*)&pCache[1]; pGroup->mxPinned = 10; }else{ @@ -40521,6 +41196,7 @@ static sqlite3_pcache *pcache1Create(int szPage, int szExtra, int bPurgeable){ pCache->pGroup = pGroup; pCache->szPage = szPage; pCache->szExtra = szExtra; + pCache->szAlloc = szPage + szExtra + ROUND8(sizeof(PgHdr1)); pCache->bPurgeable = (bPurgeable ? 1 : 0); pcache1EnterMutex(pGroup); pcache1ResizeHash(pCache); @@ -40552,7 +41228,7 @@ static void pcache1Cachesize(sqlite3_pcache *p, int nMax){ pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; pCache->nMax = nMax; pCache->n90pct = pCache->nMax*9/10; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); } } @@ -40570,7 +41246,7 @@ static void pcache1Shrink(sqlite3_pcache *p){ pcache1EnterMutex(pGroup); savedMaxPage = pGroup->nMaxPage; pGroup->nMaxPage = 0; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pGroup->nMaxPage = savedMaxPage; pcache1LeaveMutex(pGroup); } @@ -40623,26 +41299,17 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( assert( pCache->nHash>0 && pCache->apHash ); /* Step 4. Try to recycle a page. */ - if( pCache->bPurgeable && pGroup->pLruTail && ( - (pCache->nPage+1>=pCache->nMax) - || pGroup->nCurrentPage>=pGroup->nMaxPage - || pcache1UnderMemoryPressure(pCache) - )){ + if( pCache->bPurgeable + && pGroup->pLruTail + && ((pCache->nPage+1>=pCache->nMax) || pcache1UnderMemoryPressure(pCache)) + ){ PCache1 *pOther; pPage = pGroup->pLruTail; assert( pPage->isPinned==0 ); - pcache1RemoveFromHash(pPage); + pcache1RemoveFromHash(pPage, 0); pcache1PinPage(pPage); pOther = pPage->pCache; - - /* We want to verify that szPage and szExtra are the same for pOther - ** and pCache. Assert that we can verify this by comparing sums. */ - assert( (pCache->szPage & (pCache->szPage-1))==0 && pCache->szPage>=512 ); - assert( pCache->szExtra<512 ); - assert( (pOther->szPage & (pOther->szPage-1))==0 && pOther->szPage>=512 ); - assert( pOther->szExtra<512 ); - - if( pOther->szPage+pOther->szExtra != pCache->szPage+pCache->szExtra ){ + if( pOther->szAlloc != pCache->szAlloc ){ pcache1FreePage(pPage); pPage = 0; }else{ @@ -40654,9 +41321,9 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( ** attempt to allocate a new one. */ if( !pPage ){ - if( createFlag==1 ) sqlite3BeginBenignMalloc(); + if( createFlag==1 ){ sqlite3BeginBenignMalloc(); } pPage = pcache1AllocPage(pCache); - if( createFlag==1 ) sqlite3EndBenignMalloc(); + if( createFlag==1 ){ sqlite3EndBenignMalloc(); } } if( pPage ){ @@ -40730,8 +41397,13 @@ static SQLITE_NOINLINE PgHdr1 *pcache1FetchStage2( ** proceed to step 5. ** ** 5. Otherwise, allocate and return a new page buffer. +** +** There are two versions of this routine. pcache1FetchWithMutex() is +** the general case. pcache1FetchNoMutex() is a faster implementation for +** the common case where pGroup->mutex is NULL. The pcache1Fetch() wrapper +** invokes the appropriate routine. */ -static sqlite3_pcache_page *pcache1Fetch( +static PgHdr1 *pcache1FetchNoMutex( sqlite3_pcache *p, unsigned int iKey, int createFlag @@ -40739,28 +41411,63 @@ static sqlite3_pcache_page *pcache1Fetch( PCache1 *pCache = (PCache1 *)p; PgHdr1 *pPage = 0; - assert( offsetof(PgHdr1,page)==0 ); - assert( pCache->bPurgeable || createFlag!=1 ); - assert( pCache->bPurgeable || pCache->nMin==0 ); - assert( pCache->bPurgeable==0 || pCache->nMin==10 ); - assert( pCache->nMin==0 || pCache->bPurgeable ); - assert( pCache->nHash>0 ); - pcache1EnterMutex(pCache->pGroup); - /* Step 1: Search the hash table for an existing entry. */ pPage = pCache->apHash[iKey % pCache->nHash]; while( pPage && pPage->iKey!=iKey ){ pPage = pPage->pNext; } /* Step 2: Abort if no existing page is found and createFlag is 0 */ if( pPage ){ - if( !pPage->isPinned ) pcache1PinPage(pPage); + if( !pPage->isPinned ){ + return pcache1PinPage(pPage); + }else{ + return pPage; + } }else if( createFlag ){ /* Steps 3, 4, and 5 implemented by this subroutine */ - pPage = pcache1FetchStage2(pCache, iKey, createFlag); + return pcache1FetchStage2(pCache, iKey, createFlag); + }else{ + return 0; } +} +#if PCACHE1_MIGHT_USE_GROUP_MUTEX +static PgHdr1 *pcache1FetchWithMutex( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ + PCache1 *pCache = (PCache1 *)p; + PgHdr1 *pPage; + + pcache1EnterMutex(pCache->pGroup); + pPage = pcache1FetchNoMutex(p, iKey, createFlag); assert( pPage==0 || pCache->iMaxKey>=iKey ); pcache1LeaveMutex(pCache->pGroup); - return (sqlite3_pcache_page*)pPage; + return pPage; +} +#endif +static sqlite3_pcache_page *pcache1Fetch( + sqlite3_pcache *p, + unsigned int iKey, + int createFlag +){ +#if PCACHE1_MIGHT_USE_GROUP_MUTEX || defined(SQLITE_DEBUG) + PCache1 *pCache = (PCache1 *)p; +#endif + + assert( offsetof(PgHdr1,page)==0 ); + assert( pCache->bPurgeable || createFlag!=1 ); + assert( pCache->bPurgeable || pCache->nMin==0 ); + assert( pCache->bPurgeable==0 || pCache->nMin==10 ); + assert( pCache->nMin==0 || pCache->bPurgeable ); + assert( pCache->nHash>0 ); +#if PCACHE1_MIGHT_USE_GROUP_MUTEX + if( pCache->pGroup->mutex ){ + return (sqlite3_pcache_page*)pcache1FetchWithMutex(p, iKey, createFlag); + }else +#endif + { + return (sqlite3_pcache_page*)pcache1FetchNoMutex(p, iKey, createFlag); + } } @@ -40789,8 +41496,7 @@ static void pcache1Unpin( assert( pPage->isPinned==1 ); if( reuseUnlikely || pGroup->nCurrentPage>pGroup->nMaxPage ){ - pcache1RemoveFromHash(pPage); - pcache1FreePage(pPage); + pcache1RemoveFromHash(pPage, 1); }else{ /* Add the page to the PGroup LRU list. */ if( pGroup->pLruHead ){ @@ -40877,8 +41583,9 @@ static void pcache1Destroy(sqlite3_pcache *p){ assert( pGroup->nMinPage >= pCache->nMin ); pGroup->nMinPage -= pCache->nMin; pGroup->mxPinned = pGroup->nMaxPage + 10 - pGroup->nMinPage; - pcache1EnforceMaxPage(pGroup); + pcache1EnforceMaxPage(pCache); pcache1LeaveMutex(pGroup); + sqlite3_free(pCache->pBulk); sqlite3_free(pCache->apHash); sqlite3_free(pCache); } @@ -40934,7 +41641,7 @@ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ int nFree = 0; assert( sqlite3_mutex_notheld(pcache1.grp.mutex) ); assert( sqlite3_mutex_notheld(pcache1.mutex) ); - if( pcache1.pStart==0 ){ + if( sqlite3GlobalConfig.nPage==0 ){ PgHdr1 *p; pcache1EnterMutex(&pcache1.grp); while( (nReq<0 || nFree<nReq) && ((p=pcache1.grp.pLruTail)!=0) ){ @@ -40944,8 +41651,7 @@ SQLITE_PRIVATE int sqlite3PcacheReleaseMemory(int nReq){ #endif assert( p->isPinned==0 ); pcache1PinPage(p); - pcache1RemoveFromHash(p); - pcache1FreePage(p); + pcache1RemoveFromHash(p, 1); } pcache1LeaveMutex(&pcache1.grp); } @@ -41041,6 +41747,7 @@ SQLITE_PRIVATE void sqlite3PcacheStats( ** There is an added cost of O(N) when switching between TEST and ** SMALLEST primitives. */ +/* #include "sqliteInt.h" */ /* @@ -41510,6 +42217,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 ** another is writing. */ #ifndef SQLITE_OMIT_DISKIO +/* #include "sqliteInt.h" */ /************** Include wal.h in the middle of pager.c ***********************/ /************** Begin file wal.h *********************************************/ /* @@ -41531,6 +42239,7 @@ SQLITE_PRIVATE int sqlite3RowSetTest(RowSet *pRowSet, int iBatch, sqlite3_int64 #ifndef _WAL_H_ #define _WAL_H_ +/* #include "sqliteInt.h" */ /* Additional values that can be added to the sync_flags argument of ** sqlite3WalFrames(): @@ -42086,9 +42795,9 @@ struct PagerSavepoint { /* ** Bits of the Pager.doNotSpill flag. See further description below. */ -#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ -#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ -#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ +#define SPILLFLAG_OFF 0x01 /* Never spill cache. Set via pragma */ +#define SPILLFLAG_ROLLBACK 0x02 /* Current rolling back, so do not spill */ +#define SPILLFLAG_NOSYNC 0x04 /* Spill is ok, but do not sync */ /* ** An open page cache is an instance of struct Pager. A description of @@ -42170,11 +42879,11 @@ struct PagerSavepoint { ** while it is being traversed by code in pager_playback(). The SPILLFLAG_OFF ** case is a user preference. ** -** If the SPILLFLAG_NOSYNC bit is set, writing to the database from pagerStress() -** is permitted, but syncing the journal file is not. This flag is set -** by sqlite3PagerWrite() when the file-system sector-size is larger than -** the database page-size in order to prevent a journal sync from happening -** in between the journalling of two pages on the same sector. +** If the SPILLFLAG_NOSYNC bit is set, writing to the database from +** pagerStress() is permitted, but syncing the journal file is not. +** This flag is set by sqlite3PagerWrite() when the file-system sector-size +** is larger than the database page-size in order to prevent a journal sync +** from happening in between the journalling of two pages on the same sector. ** ** subjInMemory ** @@ -42277,7 +42986,7 @@ struct Pager { u8 doNotSpill; /* Do not spill the cache when non-zero */ u8 subjInMemory; /* True to use in-memory sub-journals */ u8 bUseFetch; /* True to use xFetch() */ - u8 hasBeenUsed; /* True if any content previously read from this pager*/ + u8 hasBeenUsed; /* True if any content previously read */ Pgno dbSize; /* Number of pages in the database */ Pgno dbOrigSize; /* dbSize before the current transaction */ Pgno dbFileSize; /* Number of pages in the database file */ @@ -42438,7 +43147,7 @@ static const unsigned char aJournalMagic[] = { ** ** if( pPager->jfd->pMethods ){ ... */ -#define isOpen(pFd) ((pFd)->pMethods) +#define isOpen(pFd) ((pFd)->pMethods!=0) /* ** Return true if this pager uses a write-ahead log instead of the usual @@ -42661,19 +43370,21 @@ static int subjRequiresPage(PgHdr *pPg){ int i; for(i=0; i<pPager->nSavepoint; i++){ p = &pPager->aSavepoint[i]; - if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){ + if( p->nOrig>=pgno && 0==sqlite3BitvecTestNotNull(p->pInSavepoint, pgno) ){ return 1; } } return 0; } +#ifdef SQLITE_DEBUG /* ** Return true if the page is already in the journal file. */ static int pageInJournal(Pager *pPager, PgHdr *pPg){ return sqlite3BitvecTest(pPager->pInJournal, pPg->pgno); } +#endif /* ** Read a 32-bit integer from the given file descriptor. Store the integer @@ -43285,7 +43996,8 @@ static int writeMasterJournal(Pager *pPager, const char *zMaster){ || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster))) || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum))) - || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8))) + || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, + iHdrOff+4+nMaster+8))) ){ return rc; } @@ -43845,7 +44557,7 @@ static int pager_playback_one_page( } } - /* If this page has already been played by before during the current + /* If this page has already been played back before during the current ** rollback, then don't bother to play it back again. */ if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){ @@ -44779,11 +45491,10 @@ static int pagerPagecount(Pager *pPager, Pgno *pnPage){ assert( pPager->eLock>=SHARED_LOCK ); nPage = sqlite3WalDbsize(pPager->pWal); - /* If the database size was not available from the WAL sub-system, - ** determine it based on the size of the database file. If the size - ** of the database file is not an integer multiple of the page-size, - ** round down to the nearest page. Except, any file larger than 0 - ** bytes in size is considered to contain at least one page. + /* If the number of pages in the database is not available from the + ** WAL sub-system, determine the page counte based on the size of + ** the database file. If the size of the database file is not an + ** integer multiple of the page-size, round up the result. */ if( nPage==0 ){ i64 n = 0; /* Size of db file in bytes */ @@ -45948,8 +46659,6 @@ static int openSubJournal(Pager *pPager){ /* ** Append a record of the current state of page pPg to the sub-journal. -** It is the callers responsibility to use subjRequiresPage() to check -** that it is really required before calling this function. ** ** If successful, set the bit corresponding to pPg->pgno in the bitvecs ** for all open savepoints before returning. @@ -45996,6 +46705,13 @@ static int subjournalPage(PgHdr *pPg){ } return rc; } +static int subjournalPageIfRequired(PgHdr *pPg){ + if( subjRequiresPage(pPg) ){ + return subjournalPage(pPg); + }else{ + return SQLITE_OK; + } +} /* ** This function is called by the pcache layer when it has reached some @@ -46053,9 +46769,7 @@ static int pagerStress(void *p, PgHdr *pPg){ pPg->pDirty = 0; if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. */ - if( subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); - } + rc = subjournalPageIfRequired(pPg); if( rc==SQLITE_OK ){ rc = pagerWalFrames(pPager, pPg, 0, 0); } @@ -46068,39 +46782,6 @@ static int pagerStress(void *p, PgHdr *pPg){ rc = syncJournal(pPager, 1); } - /* If the page number of this page is larger than the current size of - ** the database image, it may need to be written to the sub-journal. - ** This is because the call to pager_write_pagelist() below will not - ** actually write data to the file in this case. - ** - ** Consider the following sequence of events: - ** - ** BEGIN; - ** <journal page X> - ** <modify page X> - ** SAVEPOINT sp; - ** <shrink database file to Y pages> - ** pagerStress(page X) - ** ROLLBACK TO sp; - ** - ** If (X>Y), then when pagerStress is called page X will not be written - ** out to the database file, but will be dropped from the cache. Then, - ** following the "ROLLBACK TO sp" statement, reading page X will read - ** data from the database file. This will be the copy of page X as it - ** was when the transaction started, not as it was when "SAVEPOINT sp" - ** was executed. - ** - ** The solution is to write the current data for page X into the - ** sub-journal file now (if it is not already there), so that it will - ** be restored to its current value when the "ROLLBACK TO sp" is - ** executed. - */ - if( NEVER( - rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg) - ) ){ - rc = subjournalPage(pPg); - } - /* Write the contents of the page out to the database file. */ if( rc==SQLITE_OK ){ assert( (pPg->flags&PGHDR_NEED_SYNC)==0 ); @@ -46356,7 +47037,7 @@ SQLITE_PRIVATE int sqlite3PagerOpen( act_like_temp_file: tempFile = 1; pPager->eState = PAGER_READER; /* Pretend we already have a lock */ - pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE locking mode */ + pPager->eLock = EXCLUSIVE_LOCK; /* Pretend we are in EXCLUSIVE mode */ pPager->noLock = 1; /* Do no locking */ readOnly = (vfsFlags&SQLITE_OPEN_READONLY); } @@ -46375,7 +47056,7 @@ act_like_temp_file: assert( nExtra<1000 ); nExtra = ROUND8(nExtra); rc = sqlite3PcacheOpen(szPageDflt, nExtra, !memDb, - !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); + !memDb?pagerStress:0, (void *)pPager, pPager->pPCache); } /* If an error occurred above, free the Pager structure and close the file. @@ -46762,7 +47443,7 @@ SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){ ** occurring on the very first access to a file, in order to save a ** single unnecessary sqlite3OsRead() call at the start-up. ** - ** Database changes is detected by looking at 15 bytes beginning + ** Database changes are detected by looking at 15 bytes beginning ** at offset 24 into the file. The first 4 of these 16 bytes are ** a 32-bit counter that is incremented with each change. The ** other bytes change randomly with each file change when @@ -46970,9 +47651,14 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( if( pBase==0 ){ rc = sqlite3PcacheFetchStress(pPager->pPCache, pgno, &pBase); if( rc!=SQLITE_OK ) goto pager_acquire_err; + if( pBase==0 ){ + pPg = *ppPage = 0; + rc = SQLITE_NOMEM; + goto pager_acquire_err; + } } pPg = *ppPage = sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pBase); - if( pPg==0 ) rc = SQLITE_NOMEM; + assert( pPg!=0 ); } } @@ -46983,10 +47669,11 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( pPg = 0; goto pager_acquire_err; } - assert( (*ppPage)->pgno==pgno ); - assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 ); + assert( pPg==(*ppPage) ); + assert( pPg->pgno==pgno ); + assert( pPg->pPager==pPager || pPg->pPager==0 ); - if( (*ppPage)->pPager && !noContent ){ + if( pPg->pPager && !noContent ){ /* In this case the pcache already contains an initialized copy of ** the page. Return without further ado. */ assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) ); @@ -46997,7 +47684,6 @@ SQLITE_PRIVATE int sqlite3PagerAcquire( /* The pager cache has created a new page. Its content needs to ** be initialized. */ - pPg = *ppPage; pPg->pPager = pPager; /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page @@ -47076,6 +47762,7 @@ SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){ assert( pPager->pPCache!=0 ); pPage = sqlite3PcacheFetch(pPager->pPCache, pgno, 0); assert( pPage==0 || pPager->hasBeenUsed ); + if( pPage==0 ) return 0; return sqlite3PcacheFetchFinish(pPager->pPCache, pgno, pPage); } @@ -47279,6 +47966,59 @@ SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory return rc; } +/* +** Write page pPg onto the end of the rollback journal. +*/ +static SQLITE_NOINLINE int pagerAddPageToRollbackJournal(PgHdr *pPg){ + Pager *pPager = pPg->pPager; + int rc; + u32 cksum; + char *pData2; + i64 iOff = pPager->journalOff; + + /* We should never write to the journal file the page that + ** contains the database locks. The following assert verifies + ** that we do not. */ + assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); + + assert( pPager->journalHdr<=pPager->journalOff ); + CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); + cksum = pager_cksum(pPager, (u8*)pData2); + + /* Even if an IO or diskfull error occurs while journalling the + ** page in the block above, set the need-sync flag for the page. + ** Otherwise, when the transaction is rolled back, the logic in + ** playback_one_page() will think that the page needs to be restored + ** in the database file. And if an IO error occurs while doing so, + ** then corruption may follow. + */ + pPg->flags |= PGHDR_NEED_SYNC; + + rc = write32bits(pPager->jfd, iOff, pPg->pgno); + if( rc!=SQLITE_OK ) return rc; + rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); + if( rc!=SQLITE_OK ) return rc; + rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); + if( rc!=SQLITE_OK ) return rc; + + IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, + pPager->journalOff, pPager->pageSize)); + PAGER_INCR(sqlite3_pager_writej_count); + PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); + + pPager->journalOff += 8 + pPager->pageSize; + pPager->nRec++; + assert( pPager->pInJournal!=0 ); + rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); + testcase( rc==SQLITE_NOMEM ); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + rc |= addToSavepointBitvecs(pPager, pPg->pgno); + assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); + return rc; +} + /* ** Mark a single data page as writeable. The page is written into the ** main journal or sub-journal as required. If the page is written into @@ -47289,7 +48029,6 @@ SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory static int pager_write(PgHdr *pPg){ Pager *pPager = pPg->pPager; int rc = SQLITE_OK; - int inJournal; /* This routine is not called unless a write-transaction has already ** been started. The journal file may or may not be open at this point. @@ -47302,7 +48041,6 @@ static int pager_write(PgHdr *pPg){ assert( assert_pager_state(pPager) ); assert( pPager->errCode==0 ); assert( pPager->readOnly==0 ); - CHECK_PAGE(pPg); /* The journal file needs to be opened. Higher level routines have already @@ -47321,91 +48059,48 @@ static int pager_write(PgHdr *pPg){ assert( pPager->eState>=PAGER_WRITER_CACHEMOD ); assert( assert_pager_state(pPager) ); - /* Mark the page as dirty. If the page has already been written - ** to the journal then we can return right away. - */ + /* Mark the page that is about to be modified as dirty. */ sqlite3PcacheMakeDirty(pPg); - inJournal = pageInJournal(pPager, pPg); - if( inJournal && (pPager->nSavepoint==0 || !subjRequiresPage(pPg)) ){ - assert( !pagerUseWal(pPager) ); - }else{ - - /* The transaction journal now exists and we have a RESERVED or an - ** EXCLUSIVE lock on the main database file. Write the current page to - ** the transaction journal if it is not there already. - */ - if( !inJournal && !pagerUseWal(pPager) ){ - assert( pagerUseWal(pPager)==0 ); - if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){ - u32 cksum; - char *pData2; - i64 iOff = pPager->journalOff; - - /* We should never write to the journal file the page that - ** contains the database locks. The following assert verifies - ** that we do not. */ - assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) ); - - assert( pPager->journalHdr<=pPager->journalOff ); - CODEC2(pPager, pPg->pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); - cksum = pager_cksum(pPager, (u8*)pData2); - - /* Even if an IO or diskfull error occurs while journalling the - ** page in the block above, set the need-sync flag for the page. - ** Otherwise, when the transaction is rolled back, the logic in - ** playback_one_page() will think that the page needs to be restored - ** in the database file. And if an IO error occurs while doing so, - ** then corruption may follow. - */ - pPg->flags |= PGHDR_NEED_SYNC; - rc = write32bits(pPager->jfd, iOff, pPg->pgno); - if( rc!=SQLITE_OK ) return rc; - rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4); - if( rc!=SQLITE_OK ) return rc; - rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum); - if( rc!=SQLITE_OK ) return rc; - - IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, - pPager->journalOff, pPager->pageSize)); - PAGER_INCR(sqlite3_pager_writej_count); - PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg))); - - pPager->journalOff += 8 + pPager->pageSize; - pPager->nRec++; - assert( pPager->pInJournal!=0 ); - rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno); - testcase( rc==SQLITE_NOMEM ); - assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); - rc |= addToSavepointBitvecs(pPager, pPg->pgno); - if( rc!=SQLITE_OK ){ - assert( rc==SQLITE_NOMEM ); - return rc; - } - }else{ - if( pPager->eState!=PAGER_WRITER_DBMOD ){ - pPg->flags |= PGHDR_NEED_SYNC; - } - PAGERTRACE(("APPEND %d page %d needSync=%d\n", - PAGERID(pPager), pPg->pgno, - ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); + /* If a rollback journal is in use, them make sure the page that is about + ** to change is in the rollback journal, or if the page is a new page off + ** then end of the file, make sure it is marked as PGHDR_NEED_SYNC. + */ + assert( (pPager->pInJournal!=0) == isOpen(pPager->jfd) ); + if( pPager->pInJournal!=0 + && sqlite3BitvecTestNotNull(pPager->pInJournal, pPg->pgno)==0 + ){ + assert( pagerUseWal(pPager)==0 ); + if( pPg->pgno<=pPager->dbOrigSize ){ + rc = pagerAddPageToRollbackJournal(pPg); + if( rc!=SQLITE_OK ){ + return rc; } - } - - /* If the statement journal is open and the page is not in it, - ** then write the current page to the statement journal. Note that - ** the statement journal format differs from the standard journal format - ** in that it omits the checksums and the header. - */ - if( pPager->nSavepoint>0 && subjRequiresPage(pPg) ){ - rc = subjournalPage(pPg); + }else{ + if( pPager->eState!=PAGER_WRITER_DBMOD ){ + pPg->flags |= PGHDR_NEED_SYNC; + } + PAGERTRACE(("APPEND %d page %d needSync=%d\n", + PAGERID(pPager), pPg->pgno, + ((pPg->flags&PGHDR_NEED_SYNC)?1:0))); } } - /* Update the database size and return. + /* The PGHDR_DIRTY bit is set above when the page was added to the dirty-list + ** and before writing the page into the rollback journal. Wait until now, + ** after the page has been successfully journalled, before setting the + ** PGHDR_WRITEABLE bit that indicates that the page can be safely modified. + */ + pPg->flags |= PGHDR_WRITEABLE; + + /* If the statement journal is open and the page is not in it, + ** then write the page into the statement journal. */ + if( pPager->nSavepoint>0 ){ + rc = subjournalPageIfRequired(pPg); + } + + /* Update the database size and return. */ if( pPager->dbSize<pPg->pgno ){ pPager->dbSize = pPg->pgno; } @@ -47420,17 +48115,17 @@ static int pager_write(PgHdr *pPg){ ** a write. ** ** Usually, the sector size is less than or equal to the page size, in which -** case pages can be individually written. This routine only runs in the exceptional -** case where the page size is smaller than the sector size. +** case pages can be individually written. This routine only runs in the +** exceptional case where the page size is smaller than the sector size. */ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ - int rc = SQLITE_OK; /* Return code */ - Pgno nPageCount; /* Total number of pages in database file */ - Pgno pg1; /* First page of the sector pPg is located on. */ - int nPage = 0; /* Number of pages starting at pg1 to journal */ - int ii; /* Loop counter */ - int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ - Pager *pPager = pPg->pPager; /* The pager that owns pPg */ + int rc = SQLITE_OK; /* Return code */ + Pgno nPageCount; /* Total number of pages in database file */ + Pgno pg1; /* First page of the sector pPg is located on. */ + int nPage = 0; /* Number of pages starting at pg1 to journal */ + int ii; /* Loop counter */ + int needSync = 0; /* True if any page has PGHDR_NEED_SYNC */ + Pager *pPager = pPg->pPager; /* The pager that owns pPg */ Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize); /* Set the doNotSpill NOSYNC bit to 1. This is because we cannot allow @@ -47518,11 +48213,15 @@ static SQLITE_NOINLINE int pagerWriteLargeSector(PgHdr *pPg){ ** as appropriate. Otherwise, SQLITE_OK. */ SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ + Pager *pPager = pPg->pPager; assert( (pPg->flags & PGHDR_MMAP)==0 ); - assert( pPg->pPager->eState>=PAGER_WRITER_LOCKED ); - assert( pPg->pPager->eState!=PAGER_ERROR ); - assert( assert_pager_state(pPg->pPager) ); - if( pPg->pPager->sectorSize > (u32)pPg->pPager->pageSize ){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( pPager->eState!=PAGER_ERROR ); + assert( assert_pager_state(pPager) ); + if( (pPg->flags & PGHDR_WRITEABLE)!=0 && pPager->dbSize>=pPg->pgno ){ + if( pPager->nSavepoint ) return subjournalPageIfRequired(pPg); + return SQLITE_OK; + }else if( pPager->sectorSize > (u32)pPager->pageSize ){ return pagerWriteLargeSector(pPg); }else{ return pager_write(pPg); @@ -47536,7 +48235,7 @@ SQLITE_PRIVATE int sqlite3PagerWrite(PgHdr *pPg){ */ #ifndef NDEBUG SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){ - return pPg->flags&PGHDR_DIRTY; + return pPg->flags & PGHDR_WRITEABLE; } #endif @@ -47560,6 +48259,7 @@ SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){ PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager))); IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno)) pPg->flags |= PGHDR_DONT_WRITE; + pPg->flags &= ~PGHDR_WRITEABLE; pager_set_pagehash(pPg); } } @@ -48026,12 +48726,14 @@ SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){ return pPager->readOnly; } +#ifdef SQLITE_DEBUG /* ** Return the number of references to the pager. */ SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){ return sqlite3PcacheRefCount(pPager->pPCache); } +#endif /* ** Return the approximate number of bytes of memory currently @@ -48114,54 +48816,62 @@ SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){ ** occurs while opening the sub-journal file, then an IO error code is ** returned. Otherwise, SQLITE_OK. */ -SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ +static SQLITE_NOINLINE int pagerOpenSavepoint(Pager *pPager, int nSavepoint){ int rc = SQLITE_OK; /* Return code */ int nCurrent = pPager->nSavepoint; /* Current number of savepoints */ + int ii; /* Iterator variable */ + PagerSavepoint *aNew; /* New Pager.aSavepoint array */ assert( pPager->eState>=PAGER_WRITER_LOCKED ); assert( assert_pager_state(pPager) ); + assert( nSavepoint>nCurrent && pPager->useJournal ); - if( nSavepoint>nCurrent && pPager->useJournal ){ - int ii; /* Iterator variable */ - PagerSavepoint *aNew; /* New Pager.aSavepoint array */ + /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM + ** if the allocation fails. Otherwise, zero the new portion in case a + ** malloc failure occurs while populating it in the for(...) loop below. + */ + aNew = (PagerSavepoint *)sqlite3Realloc( + pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint + ); + if( !aNew ){ + return SQLITE_NOMEM; + } + memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); + pPager->aSavepoint = aNew; - /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM - ** if the allocation fails. Otherwise, zero the new portion in case a - ** malloc failure occurs while populating it in the for(...) loop below. - */ - aNew = (PagerSavepoint *)sqlite3Realloc( - pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint - ); - if( !aNew ){ + /* Populate the PagerSavepoint structures just allocated. */ + for(ii=nCurrent; ii<nSavepoint; ii++){ + aNew[ii].nOrig = pPager->dbSize; + if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ + aNew[ii].iOffset = pPager->journalOff; + }else{ + aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); + } + aNew[ii].iSubRec = pPager->nSubRec; + aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); + if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } - memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint)); - pPager->aSavepoint = aNew; - - /* Populate the PagerSavepoint structures just allocated. */ - for(ii=nCurrent; ii<nSavepoint; ii++){ - aNew[ii].nOrig = pPager->dbSize; - if( isOpen(pPager->jfd) && pPager->journalOff>0 ){ - aNew[ii].iOffset = pPager->journalOff; - }else{ - aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager); - } - aNew[ii].iSubRec = pPager->nSubRec; - aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize); - if( !aNew[ii].pInSavepoint ){ - return SQLITE_NOMEM; - } - if( pagerUseWal(pPager) ){ - sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); - } - pPager->nSavepoint = ii+1; + if( pagerUseWal(pPager) ){ + sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData); } - assert( pPager->nSavepoint==nSavepoint ); - assertTruncateConstraint(pPager); + pPager->nSavepoint = ii+1; } - + assert( pPager->nSavepoint==nSavepoint ); + assertTruncateConstraint(pPager); return rc; } +SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ + assert( pPager->eState>=PAGER_WRITER_LOCKED ); + assert( assert_pager_state(pPager) ); + + if( nSavepoint>pPager->nSavepoint && pPager->useJournal ){ + return pagerOpenSavepoint(pPager, nSavepoint); + }else{ + return SQLITE_OK; + } +} + /* ** This function is called to rollback or release (commit) a savepoint. @@ -48392,9 +49102,8 @@ SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, i ** one or more savepoint bitvecs. This is the reason this function ** may return SQLITE_NOMEM. */ - if( pPg->flags&PGHDR_DIRTY - && subjRequiresPage(pPg) - && SQLITE_OK!=(rc = subjournalPage(pPg)) + if( (pPg->flags & PGHDR_DIRTY)!=0 + && SQLITE_OK!=(rc = subjournalPageIfRequired(pPg)) ){ return rc; } @@ -49145,6 +49854,7 @@ SQLITE_PRIVATE int sqlite3PagerWalFramesize(Pager *pPager){ */ #ifndef SQLITE_OMIT_WAL +/* #include "wal.h" */ /* ** Trace output macros @@ -49550,9 +50260,9 @@ static void walIndexWriteHdr(Wal *pWal){ pWal->hdr.isInit = 1; pWal->hdr.iVersion = WALINDEX_MAX_VERSION; walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum); - memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); walShmBarrier(pWal); - memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr)); + memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr)); } /* @@ -49854,13 +50564,13 @@ static void walCleanupHash(Wal *pWal){ ** via the hash table even after the cleanup. */ if( iLimit ){ - int i; /* Loop counter */ + int j; /* Loop counter */ int iKey; /* Hash key */ - for(i=1; i<=iLimit; i++){ - for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){ - if( aHash[iKey]==i ) break; + for(j=1; j<=iLimit; j++){ + for(iKey=walHash(aPgno[j]); aHash[iKey]; iKey=walNextHash(iKey)){ + if( aHash[iKey]==j ) break; } - assert( aHash[iKey]==i ); + assert( aHash[iKey]==j ); } } #endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */ @@ -50362,7 +51072,7 @@ static void walMergesort( int nMerge = 0; /* Number of elements in list aMerge */ ht_slot *aMerge = 0; /* List to be merged */ int iList; /* Index into input list */ - int iSub = 0; /* Index into aSub array */ + u32 iSub = 0; /* Index into aSub array */ struct Sublist aSub[13]; /* Array of sub-lists */ memset(aSub, 0, sizeof(aSub)); @@ -50373,7 +51083,9 @@ static void walMergesort( nMerge = 1; aMerge = &aList[iList]; for(iSub=0; iList & (1<<iSub); iSub++){ - struct Sublist *p = &aSub[iSub]; + struct Sublist *p; + assert( iSub<ArraySize(aSub) ); + p = &aSub[iSub]; assert( p->aList && p->nList<=(1<<iSub) ); assert( p->aList==&aList[iList&~((2<<iSub)-1)] ); walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); @@ -50384,7 +51096,9 @@ static void walMergesort( for(iSub++; iSub<ArraySize(aSub); iSub++){ if( nList & (1<<iSub) ){ - struct Sublist *p = &aSub[iSub]; + struct Sublist *p; + assert( iSub<ArraySize(aSub) ); + p = &aSub[iSub]; assert( p->nList<=(1<<iSub) ); assert( p->aList==&aList[nList&~((2<<iSub)-1)] ); walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer); @@ -52295,6 +53009,7 @@ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ ** 4 Number of leaf pointers on this page ** * zero or more pages numbers of leaves */ +/* #include "sqliteInt.h" */ /* The following value is the maximum cell size assuming a maximum page @@ -52312,6 +53027,7 @@ SQLITE_PRIVATE int sqlite3WalFramesize(Wal *pWal){ /* Forward declarations */ typedef struct MemPage MemPage; typedef struct BtLock BtLock; +typedef struct CellInfo CellInfo; /* ** This is a magic string that appears at the beginning of every @@ -52375,7 +53091,10 @@ struct MemPage { u8 *aData; /* Pointer to disk image of the page data */ u8 *aDataEnd; /* One byte past the end of usable data */ u8 *aCellIdx; /* The cell index area */ + u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */ DbPage *pDbPage; /* Pager page handle */ + u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */ + void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */ Pgno pgno; /* Page number for this page */ }; @@ -52431,6 +53150,7 @@ struct Btree { u8 inTrans; /* TRANS_NONE, TRANS_READ or TRANS_WRITE */ u8 sharable; /* True if we can share pBt with another db */ u8 locked; /* True if db currently has pBt locked */ + u8 hasIncrblobCur; /* True if there are one or more Incrblob cursors */ int wantToLock; /* Number of nested calls to sqlite3BtreeEnter() */ int nBackup; /* Number of backup operations reading this btree */ u32 iDataVersion; /* Combines with pBt->pPager->iDataVersion */ @@ -52541,7 +53261,6 @@ struct BtShared { ** about a cell. The parseCellPtr() function fills in this structure ** based on information extract from the raw disk page. */ -typedef struct CellInfo CellInfo; struct CellInfo { i64 nKey; /* The key for INTKEY tables, or nPayload otherwise */ u8 *pPayload; /* Pointer to the start of payload */ @@ -52584,8 +53303,7 @@ struct CellInfo { struct BtCursor { Btree *pBtree; /* The Btree to which this cursor belongs */ BtShared *pBt; /* The BtShared this cursor points to */ - BtCursor *pNext, *pPrev; /* Forms a linked list of all cursors */ - struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + BtCursor *pNext; /* Forms a linked list of all cursors */ Pgno *aOverflow; /* Cache of overflow page locations */ CellInfo info; /* A parse of the cell we are pointing at */ i64 nKey; /* Size of pKey, or last integer key */ @@ -52595,9 +53313,16 @@ struct BtCursor { int skipNext; /* Prev() is noop if negative. Next() is noop if positive. ** Error code if eState==CURSOR_FAULT */ u8 curFlags; /* zero or more BTCF_* flags defined below */ + u8 curPagerFlags; /* Flags to send to sqlite3PagerAcquire() */ u8 eState; /* One of the CURSOR_XXX constants (see below) */ - u8 hints; /* As configured by CursorSetHints() */ - i16 iPage; /* Index of current page in apPage */ + u8 hints; /* As configured by CursorSetHints() */ + /* All fields above are zeroed when the cursor is allocated. See + ** sqlite3BtreeCursorZero(). Fields that follow must be manually + ** initialized. */ + i8 iPage; /* Index of current page in apPage */ + u8 curIntKey; /* Value of apPage[0]->intKey */ + struct KeyInfo *pKeyInfo; /* Argument passed to comparison function */ + void *padding1; /* Make object size a multiple of 16 */ u16 aiIdx[BTCURSOR_MAX_DEPTH]; /* Current index in apPage[i] */ MemPage *apPage[BTCURSOR_MAX_DEPTH]; /* Pages from root to current page */ }; @@ -52610,6 +53335,7 @@ struct BtCursor { #define BTCF_ValidOvfl 0x04 /* True if aOverflow is valid */ #define BTCF_AtLast 0x08 /* Cursor is pointing ot the last entry */ #define BTCF_Incrblob 0x10 /* True if an incremental I/O handle */ +#define BTCF_Multiple 0x20 /* Maybe another cursor on the same btree */ /* ** Potential values for BtCursor.eState. @@ -52752,6 +53478,7 @@ struct IntegrityCk { const char *zPfx; /* Error message prefix */ int v1, v2; /* Values for up to two %d fields in zPfx */ StrAccum errMsg; /* Accumulate the error message text here */ + u32 *heap; /* Min-heap used for analyzing cell coverage */ }; /* @@ -52762,6 +53489,21 @@ struct IntegrityCk { #define get4byte sqlite3Get4byte #define put4byte sqlite3Put4byte +/* +** get2byteAligned(), unlike get2byte(), requires that its argument point to a +** two-byte aligned address. get2bytea() is only used for accessing the +** cell addresses in a btree header. +*/ +#if SQLITE_BYTEORDER==4321 +# define get2byteAligned(x) (*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && GCC_VERSION>=4008000 +# define get2byteAligned(x) __builtin_bswap16(*(u16*)(x)) +#elif SQLITE_BYTEORDER==1234 && defined(_MSC_VER) && _MSC_VER>=1300 +# define get2byteAligned(x) _byteswap_ushort(*(u16*)(x)) +#else +# define get2byteAligned(x) ((x)[0]<<8 | (x)[1]) +#endif + /************** End of btreeInt.h ********************************************/ /************** Continuing where we left off in btmutex.c ********************/ #ifndef SQLITE_OMIT_SHARED_CACHE @@ -53065,6 +53807,7 @@ SQLITE_PRIVATE void sqlite3BtreeEnterAll(sqlite3 *db){ ** See the header comment on "btreeInt.h" for additional information. ** Including a description of file format and an overview of operation. */ +/* #include "btreeInt.h" */ /* ** The header string that appears at the beginning of every @@ -53541,13 +54284,15 @@ static void invalidateIncrblobCursors( int isClearTable /* True if all rows are being deleted */ ){ BtCursor *p; - BtShared *pBt = pBtree->pBt; + if( pBtree->hasIncrblobCur==0 ) return; assert( sqlite3BtreeHoldsMutex(pBtree) ); - for(p=pBt->pCursor; p; p=p->pNext){ - if( (p->curFlags & BTCF_Incrblob)!=0 - && (isClearTable || p->info.nKey==iRow) - ){ - p->eState = CURSOR_INVALID; + pBtree->hasIncrblobCur = 0; + for(p=pBtree->pBt->pCursor; p; p=p->pNext){ + if( (p->curFlags & BTCF_Incrblob)!=0 ){ + pBtree->hasIncrblobCur = 1; + if( isClearTable || p->info.nKey==iRow ){ + p->eState = CURSOR_INVALID; + } } } } @@ -53669,7 +54414,7 @@ static int saveCursorPosition(BtCursor *pCur){ ** table, then malloc space for and store the pCur->nKey bytes of key ** data. */ - if( 0==pCur->apPage[0]->intKey ){ + if( 0==pCur->curIntKey ){ void *pKey = sqlite3Malloc( pCur->nKey ); if( pKey ){ rc = sqlite3BtreeKey(pCur, 0, (int)pCur->nKey, pKey); @@ -53682,7 +54427,7 @@ static int saveCursorPosition(BtCursor *pCur){ rc = SQLITE_NOMEM; } } - assert( !pCur->apPage[0]->intKey || !pCur->pKey ); + assert( !pCur->curIntKey || !pCur->pKey ); if( rc==SQLITE_OK ){ btreeReleaseAllCursorPages(pCur); @@ -53704,6 +54449,15 @@ static int SQLITE_NOINLINE saveCursorsOnList(BtCursor*,Pgno,BtCursor*); ** routine is called just before cursor pExcept is used to modify the ** table, for example in BtreeDelete() or BtreeInsert(). ** +** If there are two or more cursors on the same btree, then all such +** cursors should have their BTCF_Multiple flag set. The btreeCursor() +** routine enforces that rule. This routine only needs to be called in +** the uncommon case when pExpect has the BTCF_Multiple flag set. +** +** If pExpect!=NULL and if no other cursors are found on the same root-page, +** then the BTCF_Multiple flag on pExpect is cleared, to avoid another +** pointless call to this routine. +** ** Implementation note: This routine merely checks to see if any cursors ** need to be saved. It calls out to saveCursorsOnList() in the (unusual) ** event that cursors are in need to being saved. @@ -53715,7 +54469,9 @@ static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){ for(p=pBt->pCursor; p; p=p->pNext){ if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) ) break; } - return p ? saveCursorsOnList(p, iRoot, pExcept) : SQLITE_OK; + if( p ) return saveCursorsOnList(p, iRoot, pExcept); + if( pExcept ) pExcept->curFlags &= ~BTCF_Multiple; + return SQLITE_OK; } /* This helper routine to saveAllCursors does the actual work of saving @@ -54003,39 +54759,88 @@ static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){ ** the page, 1 means the second cell, and so forth) return a pointer ** to the cell content. ** +** findCellPastPtr() does the same except it skips past the initial +** 4-byte child pointer found on interior pages, if there is one. +** ** This routine works only for pages that do not contain overflow cells. */ #define findCell(P,I) \ - ((P)->aData + ((P)->maskPage & get2byte(&(P)->aCellIdx[2*(I)]))) -#define findCellv2(D,M,O,I) (D+(M&get2byte(D+(O+2*(I))))) + ((P)->aData + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) +#define findCellPastPtr(P,I) \ + ((P)->aDataOfst + ((P)->maskPage & get2byteAligned(&(P)->aCellIdx[2*(I)]))) /* -** This a more complex version of findCell() that works for -** pages that do contain overflow cells. +** This is common tail processing for btreeParseCellPtr() and +** btreeParseCellPtrIndex() for the case when the cell does not fit entirely +** on a single B-tree page. Make necessary adjustments to the CellInfo +** structure. */ -static u8 *findOverflowCell(MemPage *pPage, int iCell){ - int i; - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - for(i=pPage->nOverflow-1; i>=0; i--){ - int k; - k = pPage->aiOvfl[i]; - if( k<=iCell ){ - if( k==iCell ){ - return pPage->apOvfl[i]; - } - iCell--; - } +static SQLITE_NOINLINE void btreeParseCellAdjustSizeForOverflow( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + /* If the payload will not fit completely on the local page, we have + ** to decide how much to store locally and how much to spill onto + ** overflow pages. The strategy is to minimize the amount of unused + ** space on overflow pages while keeping the amount of local storage + ** in between minLocal and maxLocal. + ** + ** Warning: changing the way overflow payload is distributed in any + ** way will result in an incompatible file format. + */ + int minLocal; /* Minimum amount of payload held locally */ + int maxLocal; /* Maximum amount of payload held locally */ + int surplus; /* Overflow payload available for local storage */ + + minLocal = pPage->minLocal; + maxLocal = pPage->maxLocal; + surplus = minLocal + (pInfo->nPayload - minLocal)%(pPage->pBt->usableSize-4); + testcase( surplus==maxLocal ); + testcase( surplus==maxLocal+1 ); + if( surplus <= maxLocal ){ + pInfo->nLocal = (u16)surplus; + }else{ + pInfo->nLocal = (u16)minLocal; } - return findCell(pPage, iCell); + pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); + pInfo->nSize = pInfo->iOverflow + 4; } /* -** Parse a cell content block and fill in the CellInfo structure. There -** are two versions of this function. btreeParseCell() takes a -** cell index as the second argument and btreeParseCellPtr() -** takes a pointer to the body of the cell as its second argument. +** The following routines are implementations of the MemPage.xParseCell() +** method. +** +** Parse a cell content block and fill in the CellInfo structure. +** +** btreeParseCellPtr() => table btree leaf nodes +** btreeParseCellNoPayload() => table btree internal nodes +** btreeParseCellPtrIndex() => index btree nodes +** +** There is also a wrapper function btreeParseCell() that works for +** all MemPage types and that references the cell by index rather than +** by pointer. */ +static void btreeParseCellPtrNoPayload( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 ); + assert( pPage->noPayload ); + assert( pPage->childPtrSize==4 ); +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER(pPage); +#endif + pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); + pInfo->nPayload = 0; + pInfo->nLocal = 0; + pInfo->iOverflow = 0; + pInfo->pPayload = 0; + return; +} static void btreeParseCellPtr( MemPage *pPage, /* Page containing the cell */ u8 *pCell, /* Pointer to the cell text. */ @@ -54043,26 +54848,93 @@ static void btreeParseCellPtr( ){ u8 *pIter; /* For scanning through pCell */ u32 nPayload; /* Number of bytes of cell payload */ + u64 iKey; /* Extracted Key value */ assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->leaf==0 || pPage->leaf==1 ); - if( pPage->intKeyLeaf ){ - assert( pPage->childPtrSize==0 ); - pIter = pCell + getVarint32(pCell, nPayload); - pIter += getVarint(pIter, (u64*)&pInfo->nKey); - }else if( pPage->noPayload ){ - assert( pPage->childPtrSize==4 ); - pInfo->nSize = 4 + getVarint(&pCell[4], (u64*)&pInfo->nKey); - pInfo->nPayload = 0; - pInfo->nLocal = 0; + assert( pPage->intKeyLeaf || pPage->noPayload ); + assert( pPage->noPayload==0 ); + assert( pPage->intKeyLeaf ); + assert( pPage->childPtrSize==0 ); + pIter = pCell; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint32(pIter, nPayload); + ** + ** The code is inlined to avoid a function call. + */ + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( (*pIter)>=0x80 && pIter<pEnd ); + } + pIter++; + + /* The next block of code is equivalent to: + ** + ** pIter += getVarint(pIter, (u64*)&pInfo->nKey); + ** + ** The code is inlined to avoid a function call. + */ + iKey = *pIter; + if( iKey>=0x80 ){ + u8 *pEnd = &pIter[7]; + iKey &= 0x7f; + while(1){ + iKey = (iKey<<7) | (*++pIter & 0x7f); + if( (*pIter)<0x80 ) break; + if( pIter>=pEnd ){ + iKey = (iKey<<8) | *++pIter; + break; + } + } + } + pIter++; + + pInfo->nKey = *(i64*)&iKey; + pInfo->nPayload = nPayload; + pInfo->pPayload = pIter; + testcase( nPayload==pPage->maxLocal ); + testcase( nPayload==pPage->maxLocal+1 ); + if( nPayload<=pPage->maxLocal ){ + /* This is the (easy) common case where the entire payload fits + ** on the local page. No overflow is required. + */ + pInfo->nSize = nPayload + (u16)(pIter - pCell); + if( pInfo->nSize<4 ) pInfo->nSize = 4; + pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; - pInfo->pPayload = 0; - return; }else{ - pIter = pCell + pPage->childPtrSize; - pIter += getVarint32(pIter, nPayload); - pInfo->nKey = nPayload; + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); } +} +static void btreeParseCellPtrIndex( + MemPage *pPage, /* Page containing the cell */ + u8 *pCell, /* Pointer to the cell text. */ + CellInfo *pInfo /* Fill in this structure */ +){ + u8 *pIter; /* For scanning through pCell */ + u32 nPayload; /* Number of bytes of cell payload */ + + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + assert( pPage->leaf==0 || pPage->leaf==1 ); + assert( pPage->intKeyLeaf==0 ); + assert( pPage->noPayload==0 ); + pIter = pCell + pPage->childPtrSize; + nPayload = *pIter; + if( nPayload>=0x80 ){ + u8 *pEnd = &pIter[8]; + nPayload &= 0x7f; + do{ + nPayload = (nPayload<<7) | (*++pIter & 0x7f); + }while( *(pIter)>=0x80 && pIter<pEnd ); + } + pIter++; + pInfo->nKey = nPayload; pInfo->nPayload = nPayload; pInfo->pPayload = pIter; testcase( nPayload==pPage->maxLocal ); @@ -54076,31 +54948,7 @@ static void btreeParseCellPtr( pInfo->nLocal = (u16)nPayload; pInfo->iOverflow = 0; }else{ - /* If the payload will not fit completely on the local page, we have - ** to decide how much to store locally and how much to spill onto - ** overflow pages. The strategy is to minimize the amount of unused - ** space on overflow pages while keeping the amount of local storage - ** in between minLocal and maxLocal. - ** - ** Warning: changing the way overflow payload is distributed in any - ** way will result in an incompatible file format. - */ - int minLocal; /* Minimum amount of payload held locally */ - int maxLocal; /* Maximum amount of payload held locally */ - int surplus; /* Overflow payload available for local storage */ - - minLocal = pPage->minLocal; - maxLocal = pPage->maxLocal; - surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4); - testcase( surplus==maxLocal ); - testcase( surplus==maxLocal+1 ); - if( surplus <= maxLocal ){ - pInfo->nLocal = (u16)surplus; - }else{ - pInfo->nLocal = (u16)minLocal; - } - pInfo->iOverflow = (u16)(&pInfo->pPayload[pInfo->nLocal] - pCell); - pInfo->nSize = pInfo->iOverflow + 4; + btreeParseCellAdjustSizeForOverflow(pPage, pCell, pInfo); } } static void btreeParseCell( @@ -54108,14 +54956,20 @@ static void btreeParseCell( int iCell, /* The cell index. First cell is 0 */ CellInfo *pInfo /* Fill in this structure */ ){ - btreeParseCellPtr(pPage, findCell(pPage, iCell), pInfo); + pPage->xParseCell(pPage, findCell(pPage, iCell), pInfo); } /* +** The following routines are implementations of the MemPage.xCellSize +** method. +** ** Compute the total number of bytes that a Cell needs in the cell ** data area of the btree-page. The return number includes the cell ** data header and the local payload, but not any overflow page or ** the space used by the cell pointer. +** +** cellSizePtrNoPayload() => table internal nodes +** cellSizePtr() => all index nodes & table leaf nodes */ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ u8 *pIter = pCell + pPage->childPtrSize; /* For looping over bytes of pCell */ @@ -54128,18 +54982,13 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of ** this function verifies that this invariant is not violated. */ CellInfo debuginfo; - btreeParseCellPtr(pPage, pCell, &debuginfo); + pPage->xParseCell(pPage, pCell, &debuginfo); #endif - if( pPage->noPayload ){ - pEnd = &pIter[9]; - while( (*pIter++)&0x80 && pIter<pEnd ); - assert( pPage->childPtrSize==4 ); - return (u16)(pIter - pCell); - } + assert( pPage->noPayload==0 ); nSize = *pIter; if( nSize>=0x80 ){ - pEnd = &pIter[9]; + pEnd = &pIter[8]; nSize &= 0x7f; do{ nSize = (nSize<<7) | (*++pIter & 0x7f); @@ -54171,12 +55020,34 @@ static u16 cellSizePtr(MemPage *pPage, u8 *pCell){ assert( nSize==debuginfo.nSize || CORRUPT_DB ); return (u16)nSize; } +static u16 cellSizePtrNoPayload(MemPage *pPage, u8 *pCell){ + u8 *pIter = pCell + 4; /* For looping over bytes of pCell */ + u8 *pEnd; /* End mark for a varint */ + +#ifdef SQLITE_DEBUG + /* The value returned by this function should always be the same as + ** the (CellInfo.nSize) value found by doing a full parse of the + ** cell. If SQLITE_DEBUG is defined, an assert() at the bottom of + ** this function verifies that this invariant is not violated. */ + CellInfo debuginfo; + pPage->xParseCell(pPage, pCell, &debuginfo); +#else + UNUSED_PARAMETER(pPage); +#endif + + assert( pPage->childPtrSize==4 ); + pEnd = pIter + 9; + while( (*pIter++)&0x80 && pIter<pEnd ); + assert( debuginfo.nSize==(u16)(pIter - pCell) || CORRUPT_DB ); + return (u16)(pIter - pCell); +} + #ifdef SQLITE_DEBUG /* This variation on cellSizePtr() is used inside of assert() statements ** only. */ static u16 cellSize(MemPage *pPage, int iCell){ - return cellSizePtr(pPage, findCell(pPage, iCell)); + return pPage->xCellSize(pPage, findCell(pPage, iCell)); } #endif @@ -54190,7 +55061,7 @@ static void ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell, int *pRC){ CellInfo info; if( *pRC ) return; assert( pCell!=0 ); - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&pCell[info.iOverflow]); ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, pRC); @@ -54247,26 +55118,18 @@ static int defragmentPage(MemPage *pPage){ pc = get2byte(pAddr); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); -#if !defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) /* These conditions have already been verified in btreeInitPage() - ** if SQLITE_ENABLE_OVERSIZE_CELL_CHECK is defined + ** if PRAGMA cell_size_check=ON. */ if( pc<iCellFirst || pc>iCellLast ){ return SQLITE_CORRUPT_BKPT; } -#endif assert( pc>=iCellFirst && pc<=iCellLast ); - size = cellSizePtr(pPage, &src[pc]); + size = pPage->xCellSize(pPage, &src[pc]); cbrk -= size; -#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) - if( cbrk<iCellFirst ){ - return SQLITE_CORRUPT_BKPT; - } -#else if( cbrk<iCellFirst || pc+size>usableSize ){ return SQLITE_CORRUPT_BKPT; } -#endif assert( cbrk+size<=usableSize && cbrk>=iCellFirst ); testcase( cbrk+size==usableSize ); testcase( pc+size==usableSize ); @@ -54304,18 +55167,20 @@ static int defragmentPage(MemPage *pPage){ ** This function may detect corruption within pPg. If corruption is ** detected then *pRc is set to SQLITE_CORRUPT and NULL is returned. ** -** If a slot of at least nByte bytes is found but cannot be used because -** there are already at least 60 fragmented bytes on the page, return NULL. -** In this case, if pbDefrag parameter is not NULL, set *pbDefrag to true. +** Slots on the free list that are between 1 and 3 bytes larger than nByte +** will be ignored if adding the extra space to the fragmentation count +** causes the fragmentation count to exceed 60. */ -static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ +static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc){ const int hdr = pPg->hdrOffset; u8 * const aData = pPg->aData; - int iAddr; - int pc; + int iAddr = hdr + 1; + int pc = get2byte(&aData[iAddr]); + int x; int usableSize = pPg->pBt->usableSize; - for(iAddr=hdr+1; (pc = get2byte(&aData[iAddr]))>0; iAddr=pc){ + assert( pc>0 ); + do{ int size; /* Size of the free slot */ /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ @@ -54327,24 +55192,21 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ ** freeblock form a big-endian integer which is the size of the freeblock ** in bytes, including the 4-byte header. */ size = get2byte(&aData[pc+2]); - if( size>=nByte ){ - int x = size - nByte; + if( (x = size - nByte)>=0 ){ testcase( x==4 ); testcase( x==3 ); - if( x<4 ){ + if( pc < pPg->cellOffset+2*pPg->nCell || size+pc > usableSize ){ + *pRc = SQLITE_CORRUPT_BKPT; + return 0; + }else if( x<4 ){ /* EVIDENCE-OF: R-11498-58022 In a well-formed b-tree page, the total ** number of bytes in fragments may not exceed 60. */ - if( aData[hdr+7]>=60 ){ - if( pbDefrag ) *pbDefrag = 1; - return 0; - } + if( aData[hdr+7]>57 ) return 0; + /* Remove the slot from the free-list. Update the number of ** fragmented bytes within the page. */ memcpy(&aData[iAddr], &aData[pc], 2); aData[hdr+7] += (u8)x; - }else if( size+pc > usableSize ){ - *pRc = SQLITE_CORRUPT_BKPT; - return 0; }else{ /* The slot remains on the free-list. Reduce its size to account ** for the portion used by the new allocation. */ @@ -54352,7 +55214,9 @@ static u8 *pageFindSlot(MemPage *pPg, int nByte, int *pRc, int *pbDefrag){ } return &aData[pc + x]; } - } + iAddr = pc; + pc = get2byte(&aData[pc]); + }while( pc ); return 0; } @@ -54393,8 +55257,15 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ ** then the cell content offset of an empty page wants to be 65536. ** However, that integer is too large to be stored in a 2-byte unsigned ** integer, so a value of 0 is used in its place. */ - top = get2byteNotZero(&data[hdr+5]); - if( gap>top ) return SQLITE_CORRUPT_BKPT; + top = get2byte(&data[hdr+5]); + assert( top<=(int)pPage->pBt->usableSize ); /* Prevent by getAndInitPage() */ + if( gap>top ){ + if( top==0 && pPage->pBt->usableSize==65536 ){ + top = 65536; + }else{ + return SQLITE_CORRUPT_BKPT; + } + } /* If there is enough space between gap and top for one more cell pointer ** array entry offset, and if the freelist is not empty, then search the @@ -54403,15 +55274,14 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ testcase( gap+2==top ); testcase( gap+1==top ); testcase( gap==top ); - if( gap+2<=top && (data[hdr+1] || data[hdr+2]) ){ - int bDefrag = 0; - u8 *pSpace = pageFindSlot(pPage, nByte, &rc, &bDefrag); - if( rc ) return rc; - if( bDefrag ) goto defragment_page; + if( (data[hdr+2] || data[hdr+1]) && gap+2<=top ){ + u8 *pSpace = pageFindSlot(pPage, nByte, &rc); if( pSpace ){ assert( pSpace>=data && (pSpace - data)<65536 ); *pIdx = (int)(pSpace - data); return SQLITE_OK; + }else if( rc ){ + return rc; } } @@ -54420,7 +55290,6 @@ static int allocateSpace(MemPage *pPage, int nByte, int *pIdx){ */ testcase( gap+2+nByte==top ); if( gap+2+nByte>top ){ - defragment_page: assert( pPage->nCell>0 || CORRUPT_DB ); rc = defragmentPage(pPage); if( rc ) return rc; @@ -54467,7 +55336,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ assert( pPage->pBt!=0 ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); - assert( iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); + assert( CORRUPT_DB || iStart>=pPage->hdrOffset+6+pPage->childPtrSize ); assert( CORRUPT_DB || iEnd <= pPage->pBt->usableSize ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( iSize>=4 ); /* Minimum cell size is 4 */ @@ -54496,7 +55365,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ /* At this point: ** iFreeBlk: First freeblock after iStart, or zero if none - ** iPtr: The address of a pointer iFreeBlk + ** iPtr: The address of a pointer to iFreeBlk ** ** Check to see if iFreeBlk should be coalesced onto the end of iStart. */ @@ -54504,6 +55373,7 @@ static int freeSpace(MemPage *pPage, u16 iStart, u16 iSize){ nFrag = iFreeBlk - iEnd; if( iEnd>iFreeBlk ) return SQLITE_CORRUPT_BKPT; iEnd = iFreeBlk + get2byte(&data[iFreeBlk+2]); + if( iEnd > pPage->pBt->usableSize ) return SQLITE_CORRUPT_BKPT; iSize = iEnd - iStart; iFreeBlk = get2byte(&data[iFreeBlk]); } @@ -54561,6 +55431,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ pPage->leaf = (u8)(flagByte>>3); assert( PTF_LEAF == 1<<3 ); flagByte &= ~PTF_LEAF; pPage->childPtrSize = 4-4*pPage->leaf; + pPage->xCellSize = cellSizePtr; pBt = pPage->pBt; if( flagByte==(PTF_LEAFDATA | PTF_INTKEY) ){ /* EVIDENCE-OF: R-03640-13415 A value of 5 means the page is an interior @@ -54570,8 +55441,16 @@ static int decodeFlags(MemPage *pPage, int flagByte){ ** table b-tree page. */ assert( (PTF_LEAFDATA|PTF_INTKEY|PTF_LEAF)==13 ); pPage->intKey = 1; - pPage->intKeyLeaf = pPage->leaf; - pPage->noPayload = !pPage->leaf; + if( pPage->leaf ){ + pPage->intKeyLeaf = 1; + pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtr; + }else{ + pPage->intKeyLeaf = 0; + pPage->noPayload = 1; + pPage->xCellSize = cellSizePtrNoPayload; + pPage->xParseCell = btreeParseCellPtrNoPayload; + } pPage->maxLocal = pBt->maxLeaf; pPage->minLocal = pBt->minLeaf; }else if( flagByte==PTF_ZERODATA ){ @@ -54584,6 +55463,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ pPage->intKey = 0; pPage->intKeyLeaf = 0; pPage->noPayload = 0; + pPage->xParseCell = btreeParseCellPtrIndex; pPage->maxLocal = pBt->maxLocal; pPage->minLocal = pBt->minLocal; }else{ @@ -54607,6 +55487,7 @@ static int decodeFlags(MemPage *pPage, int flagByte){ static int btreeInitPage(MemPage *pPage){ assert( pPage->pBt!=0 ); + assert( pPage->pBt->db!=0 ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); assert( pPage->pgno==sqlite3PagerPagenumber(pPage->pDbPage) ); assert( pPage == sqlite3PagerGetExtra(pPage->pDbPage) ); @@ -54638,6 +55519,7 @@ static int btreeInitPage(MemPage *pPage){ pPage->cellOffset = cellOffset = hdr + 8 + pPage->childPtrSize; pPage->aDataEnd = &data[usableSize]; pPage->aCellIdx = &data[cellOffset]; + pPage->aDataOfst = &data[pPage->childPtrSize]; /* EVIDENCE-OF: R-58015-48175 The two-byte integer at offset 5 designates ** the start of the cell content area. A zero value for this integer is ** interpreted as 65536. */ @@ -54665,20 +55547,19 @@ static int btreeInitPage(MemPage *pPage){ */ iCellFirst = cellOffset + 2*pPage->nCell; iCellLast = usableSize - 4; -#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) - { + if( pBt->db->flags & SQLITE_CellSizeCk ){ int i; /* Index into the cell pointer array */ int sz; /* Size of a cell */ if( !pPage->leaf ) iCellLast--; for(i=0; i<pPage->nCell; i++){ - pc = get2byte(&data[cellOffset+i*2]); + pc = get2byteAligned(&data[cellOffset+i*2]); testcase( pc==iCellFirst ); testcase( pc==iCellLast ); if( pc<iCellFirst || pc>iCellLast ){ return SQLITE_CORRUPT_BKPT; } - sz = cellSizePtr(pPage, &data[pc]); + sz = pPage->xCellSize(pPage, &data[pc]); testcase( pc+sz==usableSize ); if( pc+sz>usableSize ){ return SQLITE_CORRUPT_BKPT; @@ -54686,7 +55567,6 @@ static int btreeInitPage(MemPage *pPage){ } if( !pPage->leaf ) iCellLast++; } -#endif /* Compute the total free space on the page ** EVIDENCE-OF: R-23588-34450 The two-byte integer at offset 1 gives the @@ -54759,6 +55639,7 @@ static void zeroPage(MemPage *pPage, int flags){ pPage->cellOffset = first; pPage->aDataEnd = &data[pBt->usableSize]; pPage->aCellIdx = &data[first]; + pPage->aDataOfst = &data[pPage->childPtrSize]; pPage->nOverflow = 0; assert( pBt->pageSize>=512 && pBt->pageSize<=65536 ); pPage->maskPage = (u16)(pBt->pageSize - 1); @@ -54777,16 +55658,16 @@ static MemPage *btreePageFromDbPage(DbPage *pDbPage, Pgno pgno, BtShared *pBt){ pPage->pDbPage = pDbPage; pPage->pBt = pBt; pPage->pgno = pgno; - pPage->hdrOffset = pPage->pgno==1 ? 100 : 0; + pPage->hdrOffset = pgno==1 ? 100 : 0; return pPage; } /* ** Get a page from the pager. Initialize the MemPage.pBt and -** MemPage.aData elements if needed. +** MemPage.aData elements if needed. See also: btreeGetUnusedPage(). ** -** If the noContent flag is set, it means that we do not care about -** the content of the page at this time. So do not go to the disk +** If the PAGER_GET_NOCONTENT flag is set, it means that we do not care +** about the content of the page at this time. So do not go to the disk ** to fetch the content. Just fill in the content with zeros for now. ** If in the future we call sqlite3PagerWrite() on this page, that ** means we have started to be concerned about content and the disk @@ -54838,35 +55719,62 @@ SQLITE_PRIVATE u32 sqlite3BtreeLastPage(Btree *p){ } /* -** Get a page from the pager and initialize it. This routine is just a -** convenience wrapper around separate calls to btreeGetPage() and -** btreeInitPage(). +** Get a page from the pager and initialize it. +** +** If pCur!=0 then the page is being fetched as part of a moveToChild() +** call. Do additional sanity checking on the page in this case. +** And if the fetch fails, this routine must decrement pCur->iPage. ** -** If an error occurs, then the value *ppPage is set to is undefined. It +** The page is fetched as read-write unless pCur is not NULL and is +** a read-only cursor. +** +** If an error occurs, then *ppPage is undefined. It ** may remain unchanged, or it may be set to an invalid value. */ static int getAndInitPage( BtShared *pBt, /* The database file */ Pgno pgno, /* Number of the page to get */ MemPage **ppPage, /* Write the page pointer here */ - int bReadonly /* PAGER_GET_READONLY or 0 */ + BtCursor *pCur, /* Cursor to receive the page, or NULL */ + int bReadOnly /* True for a read-only page */ ){ int rc; + DbPage *pDbPage; assert( sqlite3_mutex_held(pBt->mutex) ); - assert( bReadonly==PAGER_GET_READONLY || bReadonly==0 ); + assert( pCur==0 || ppPage==&pCur->apPage[pCur->iPage] ); + assert( pCur==0 || bReadOnly==pCur->curPagerFlags ); + assert( pCur==0 || pCur->iPage>0 ); if( pgno>btreePagecount(pBt) ){ rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = btreeGetPage(pBt, pgno, ppPage, bReadonly); - if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ - rc = btreeInitPage(*ppPage); - if( rc!=SQLITE_OK ){ - releasePage(*ppPage); - } + goto getAndInitPage_error; + } + rc = sqlite3PagerAcquire(pBt->pPager, pgno, (DbPage**)&pDbPage, bReadOnly); + if( rc ){ + goto getAndInitPage_error; + } + *ppPage = btreePageFromDbPage(pDbPage, pgno, pBt); + if( (*ppPage)->isInit==0 ){ + rc = btreeInitPage(*ppPage); + if( rc!=SQLITE_OK ){ + releasePage(*ppPage); + goto getAndInitPage_error; } } + /* If obtaining a child page for a cursor, we must verify that the page is + ** compatible with the root page. */ + if( pCur + && ((*ppPage)->nCell<1 || (*ppPage)->intKey!=pCur->curIntKey) + ){ + rc = SQLITE_CORRUPT_BKPT; + releasePage(*ppPage); + goto getAndInitPage_error; + } + return SQLITE_OK; + +getAndInitPage_error: + if( pCur ) pCur->iPage--; testcase( pgno==0 ); assert( pgno!=0 || rc==SQLITE_CORRUPT ); return rc; @@ -54876,18 +55784,49 @@ static int getAndInitPage( ** Release a MemPage. This should be called once for each prior ** call to btreeGetPage. */ +static void releasePageNotNull(MemPage *pPage){ + assert( pPage->aData ); + assert( pPage->pBt ); + assert( pPage->pDbPage!=0 ); + assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); + assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); + assert( sqlite3_mutex_held(pPage->pBt->mutex) ); + sqlite3PagerUnrefNotNull(pPage->pDbPage); +} static void releasePage(MemPage *pPage){ - if( pPage ){ - assert( pPage->aData ); - assert( pPage->pBt ); - assert( pPage->pDbPage!=0 ); - assert( sqlite3PagerGetExtra(pPage->pDbPage) == (void*)pPage ); - assert( sqlite3PagerGetData(pPage->pDbPage)==pPage->aData ); - assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - sqlite3PagerUnrefNotNull(pPage->pDbPage); + if( pPage ) releasePageNotNull(pPage); +} + +/* +** Get an unused page. +** +** This works just like btreeGetPage() with the addition: +** +** * If the page is already in use for some other purpose, immediately +** release it and return an SQLITE_CURRUPT error. +** * Make sure the isInit flag is clear +*/ +static int btreeGetUnusedPage( + BtShared *pBt, /* The btree */ + Pgno pgno, /* Number of the page to fetch */ + MemPage **ppPage, /* Return the page in this parameter */ + int flags /* PAGER_GET_NOCONTENT or PAGER_GET_READONLY */ +){ + int rc = btreeGetPage(pBt, pgno, ppPage, flags); + if( rc==SQLITE_OK ){ + if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ + releasePage(*ppPage); + *ppPage = 0; + return SQLITE_CORRUPT_BKPT; + } + (*ppPage)->isInit = 0; + }else{ + *ppPage = 0; } + return rc; } + /* ** During a rollback, when the pager reloads information into the cache ** so that the cache is restored to its original state at the start of @@ -55830,7 +56769,7 @@ static void unlockBtreeIfUnused(BtShared *pBt){ assert( pPage1->aData ); assert( sqlite3PagerRefcount(pBt->pPager)==1 ); pBt->pPage1 = 0; - releasePage(pPage1); + releasePageNotNull(pPage1); } } @@ -56135,15 +57074,17 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){ u8 isInitOrig = pPage->isInit; int i; int nCell; + int rc; - btreeInitPage(pPage); + rc = btreeInitPage(pPage); + if( rc ) return rc; nCell = pPage->nCell; for(i=0; i<nCell; i++){ u8 *pCell = findCell(pPage, i); if( eType==PTRMAP_OVERFLOW1 ){ CellInfo info; - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); if( info.iOverflow && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage && iFrom==get4byte(&pCell[info.iOverflow]) @@ -56442,7 +57383,7 @@ SQLITE_PRIVATE int sqlite3BtreeIncrVacuum(Btree *p){ static int autoVacuumCommit(BtShared *pBt){ int rc = SQLITE_OK; Pager *pPager = pBt->pPager; - VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager) ); + VVA_ONLY( int nRef = sqlite3PagerRefcount(pPager); ) assert( sqlite3_mutex_held(pBt->mutex) ); invalidateAllOverflowCache(pBt); @@ -56884,6 +57825,7 @@ static int btreeCursor( BtCursor *pCur /* Space for new cursor */ ){ BtShared *pBt = p->pBt; /* Shared b-tree handle */ + BtCursor *pX; /* Looping over other all cursors */ assert( sqlite3BtreeHoldsMutex(p) ); assert( wrFlag==0 || wrFlag==1 ); @@ -56899,10 +57841,8 @@ static int btreeCursor( assert( p->inTrans>TRANS_NONE ); assert( wrFlag==0 || p->inTrans==TRANS_WRITE ); assert( pBt->pPage1 && pBt->pPage1->aData ); + assert( wrFlag==0 || (pBt->btsFlags & BTS_READ_ONLY)==0 ); - if( NEVER(wrFlag && (pBt->btsFlags & BTS_READ_ONLY)!=0) ){ - return SQLITE_READONLY; - } if( wrFlag ){ allocateTempSpace(pBt); if( pBt->pTmpSpace==0 ) return SQLITE_NOMEM; @@ -56921,10 +57861,16 @@ static int btreeCursor( pCur->pBt = pBt; assert( wrFlag==0 || wrFlag==BTCF_WriteFlag ); pCur->curFlags = wrFlag; - pCur->pNext = pBt->pCursor; - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur; + pCur->curPagerFlags = wrFlag ? 0 : PAGER_GET_READONLY; + /* If there are two or more cursors on the same btree, then all such + ** cursors *must* have the BTCF_Multiple flag set. */ + for(pX=pBt->pCursor; pX; pX=pX->pNext){ + if( pX->pgnoRoot==(Pgno)iTable ){ + pX->curFlags |= BTCF_Multiple; + pCur->curFlags |= BTCF_Multiple; + } } + pCur->pNext = pBt->pCursor; pBt->pCursor = pCur; pCur->eState = CURSOR_INVALID; return SQLITE_OK; @@ -56937,9 +57883,13 @@ SQLITE_PRIVATE int sqlite3BtreeCursor( BtCursor *pCur /* Write new cursor here */ ){ int rc; - sqlite3BtreeEnter(p); - rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); - sqlite3BtreeLeave(p); + if( iTable<1 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + sqlite3BtreeEnter(p); + rc = btreeCursor(p, iTable, wrFlag, pKeyInfo, pCur); + sqlite3BtreeLeave(p); + } return rc; } @@ -56978,13 +57928,18 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ BtShared *pBt = pCur->pBt; sqlite3BtreeEnter(pBtree); sqlite3BtreeClearCursor(pCur); - if( pCur->pPrev ){ - pCur->pPrev->pNext = pCur->pNext; - }else{ + assert( pBt->pCursor!=0 ); + if( pBt->pCursor==pCur ){ pBt->pCursor = pCur->pNext; - } - if( pCur->pNext ){ - pCur->pNext->pPrev = pCur->pPrev; + }else{ + BtCursor *pPrev = pBt->pCursor; + do{ + if( pPrev->pNext==pCur ){ + pPrev->pNext = pCur->pNext; + break; + } + pPrev = pPrev->pNext; + }while( ALWAYS(pPrev) ); } for(i=0; i<=pCur->iPage; i++){ releasePage(pCur->apPage[i]); @@ -57004,13 +57959,6 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ ** ** BtCursor.info is a cache of the information in the current cell. ** Using this cache reduces the number of calls to btreeParseCell(). -** -** 2007-06-25: There is a bug in some versions of MSVC that cause the -** compiler to crash when getCellInfo() is implemented as a macro. -** But there is a measureable speed advantage to using the macro on gcc -** (when less compiler optimizations like -Os or -O0 are used and the -** compiler is not doing aggressive inlining.) So we use a real function -** for MSVC and a macro for everything else. Ticket #2457. */ #ifndef NDEBUG static void assertCellInfo(BtCursor *pCur){ @@ -57023,28 +57971,15 @@ SQLITE_PRIVATE int sqlite3BtreeCloseCursor(BtCursor *pCur){ #else #define assertCellInfo(x) #endif -#ifdef _MSC_VER - /* Use a real function in MSVC to work around bugs in that compiler. */ - static void getCellInfo(BtCursor *pCur){ - if( pCur->info.nSize==0 ){ - int iPage = pCur->iPage; - btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); - pCur->curFlags |= BTCF_ValidNKey; - }else{ - assertCellInfo(pCur); - } - } -#else /* if not _MSC_VER */ - /* Use a macro in all other compilers so that the function is inlined */ -#define getCellInfo(pCur) \ - if( pCur->info.nSize==0 ){ \ - int iPage = pCur->iPage; \ - btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); \ - pCur->curFlags |= BTCF_ValidNKey; \ - }else{ \ - assertCellInfo(pCur); \ +static SQLITE_NOINLINE void getCellInfo(BtCursor *pCur){ + if( pCur->info.nSize==0 ){ + int iPage = pCur->iPage; + pCur->curFlags |= BTCF_ValidNKey; + btreeParseCell(pCur->apPage[iPage],pCur->aiIdx[iPage],&pCur->info); + }else{ + assertCellInfo(pCur); } -#endif /* _MSC_VER */ +} #ifndef NDEBUG /* The next routine used only within assert() statements */ /* @@ -57550,9 +58485,6 @@ SQLITE_PRIVATE const void *sqlite3BtreeDataFetch(BtCursor *pCur, u32 *pAmt){ ** vice-versa). */ static int moveToChild(BtCursor *pCur, u32 newPgno){ - int rc; - int i = pCur->iPage; - MemPage *pNewPage; BtShared *pBt = pCur->pBt; assert( cursorHoldsMutex(pCur) ); @@ -57562,19 +58494,12 @@ static int moveToChild(BtCursor *pCur, u32 newPgno){ if( pCur->iPage>=(BTCURSOR_MAX_DEPTH-1) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, newPgno, &pNewPage, - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); - if( rc ) return rc; - pCur->apPage[i+1] = pNewPage; - pCur->aiIdx[i+1] = 0; - pCur->iPage++; - pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); - if( pNewPage->nCell<1 || pNewPage->intKey!=pCur->apPage[i]->intKey ){ - return SQLITE_CORRUPT_BKPT; - } - return SQLITE_OK; + pCur->iPage++; + pCur->aiIdx[pCur->iPage] = 0; + return getAndInitPage(pBt, newPgno, &pCur->apPage[pCur->iPage], + pCur, pCur->curPagerFlags); } #if SQLITE_DEBUG @@ -57618,11 +58543,9 @@ static void moveToParent(BtCursor *pCur){ pCur->apPage[pCur->iPage]->pgno ); testcase( pCur->aiIdx[pCur->iPage-1] > pCur->apPage[pCur->iPage-1]->nCell ); - - releasePage(pCur->apPage[pCur->iPage]); - pCur->iPage--; pCur->info.nSize = 0; pCur->curFlags &= ~(BTCF_ValidNKey|BTCF_ValidOvfl); + releasePageNotNull(pCur->apPage[pCur->iPage--]); } /* @@ -57663,18 +58586,23 @@ static int moveToRoot(BtCursor *pCur){ } if( pCur->iPage>=0 ){ - while( pCur->iPage ) releasePage(pCur->apPage[pCur->iPage--]); + while( pCur->iPage ){ + assert( pCur->apPage[pCur->iPage]!=0 ); + releasePageNotNull(pCur->apPage[pCur->iPage--]); + } }else if( pCur->pgnoRoot==0 ){ pCur->eState = CURSOR_INVALID; return SQLITE_OK; }else{ + assert( pCur->iPage==(-1) ); rc = getAndInitPage(pCur->pBtree->pBt, pCur->pgnoRoot, &pCur->apPage[0], - (pCur->curFlags & BTCF_WriteFlag)==0 ? PAGER_GET_READONLY : 0); + 0, pCur->curPagerFlags); if( rc!=SQLITE_OK ){ pCur->eState = CURSOR_INVALID; return rc; } pCur->iPage = 0; + pCur->curIntKey = pCur->apPage[0]->intKey; } pRoot = pCur->apPage[0]; assert( pRoot->pgno==pCur->pgnoRoot ); @@ -57877,7 +58805,7 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( /* If the cursor is already positioned at the point we are trying ** to move to, then just return without doing any work */ if( pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0 - && pCur->apPage[0]->intKey + && pCur->curIntKey ){ if( pCur->info.nKey==intKey ){ *pRes = 0; @@ -57912,7 +58840,8 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 ); return SQLITE_OK; } - assert( pCur->apPage[0]->intKey || pIdxKey ); + assert( pCur->apPage[0]->intKey==pCur->curIntKey ); + assert( pCur->curIntKey || pIdxKey ); for(;;){ int lwr, upr, idx, c; Pgno chldPg; @@ -57935,7 +58864,7 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( if( xRecordCompare==0 ){ for(;;){ i64 nCellKey; - pCell = findCell(pPage, idx) + pPage->childPtrSize; + pCell = findCellPastPtr(pPage, idx); if( pPage->intKeyLeaf ){ while( 0x80 <= *(pCell++) ){ if( pCell>=pPage->aDataEnd ) return SQLITE_CORRUPT_BKPT; @@ -57967,8 +58896,8 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( } }else{ for(;;){ - int nCell; - pCell = findCell(pPage, idx) + pPage->childPtrSize; + int nCell; /* Size of the pCell cell in bytes */ + pCell = findCellPastPtr(pPage, idx); /* The maximum supported page-size is 65536 bytes. This means that ** the maximum number of record bytes stored on an index B-Tree @@ -57996,12 +58925,25 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked( /* The record flows over onto one or more overflow pages. In ** this case the whole cell needs to be parsed, a buffer allocated ** and accessPayload() used to retrieve the record into the - ** buffer before VdbeRecordCompare() can be called. */ + ** buffer before VdbeRecordCompare() can be called. + ** + ** If the record is corrupt, the xRecordCompare routine may read + ** up to two varints past the end of the buffer. An extra 18 + ** bytes of padding is allocated at the end of the buffer in + ** case this happens. */ void *pCellKey; u8 * const pCellBody = pCell - pPage->childPtrSize; - btreeParseCellPtr(pPage, pCellBody, &pCur->info); + pPage->xParseCell(pPage, pCellBody, &pCur->info); nCell = (int)pCur->info.nKey; - pCellKey = sqlite3Malloc( nCell ); + testcase( nCell<0 ); /* True if key size is 2^32 or more */ + testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */ + testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */ + testcase( nCell==2 ); /* Minimum legal index key size */ + if( nCell<2 ){ + rc = SQLITE_CORRUPT_BKPT; + goto moveto_finish; + } + pCellKey = sqlite3Malloc( nCell+18 ); if( pCellKey==0 ){ rc = SQLITE_NOMEM; goto moveto_finish; @@ -58294,8 +59236,7 @@ SQLITE_PRIVATE int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){ ** sqlite3PagerUnref() on the new page when it is done. ** ** SQLITE_OK is returned on success. Any other return value indicates -** an error. *ppPage and *pPgno are undefined in the event of an error. -** Do not invoke sqlite3PagerUnref() on *ppPage if an error is returned. +** an error. *ppPage is set to NULL in the event of an error. ** ** If the "nearby" parameter is not 0, then an effort is made to ** locate a page close to the page number "nearby". This can be used in an @@ -58338,6 +59279,7 @@ static int allocateBtreePage( /* There are pages on the freelist. Reuse one of those pages. */ Pgno iTrunk; u8 searchList = 0; /* If the free-list must be searched for 'nearby' */ + u32 nSearch = 0; /* Count of the number of search attempts */ /* If eMode==BTALLOC_EXACT and a query of the pointer-map ** shows that the page 'nearby' is somewhere on the free-list, then @@ -58386,10 +59328,10 @@ static int allocateBtreePage( iTrunk = get4byte(&pPage1->aData[32]); } testcase( iTrunk==mxPage ); - if( iTrunk>mxPage ){ + if( iTrunk>mxPage || nSearch++ > n ){ rc = SQLITE_CORRUPT_BKPT; }else{ - rc = btreeGetPage(pBt, iTrunk, &pTrunk, 0); + rc = btreeGetUnusedPage(pBt, iTrunk, &pTrunk, 0); } if( rc ){ pTrunk = 0; @@ -58454,7 +59396,7 @@ static int allocateBtreePage( goto end_allocate_page; } testcase( iNewTrunk==mxPage ); - rc = btreeGetPage(pBt, iNewTrunk, &pNewTrunk, 0); + rc = btreeGetUnusedPage(pBt, iNewTrunk, &pNewTrunk, 0); if( rc!=SQLITE_OK ){ goto end_allocate_page; } @@ -58534,11 +59476,12 @@ static int allocateBtreePage( } put4byte(&aData[4], k-1); noContent = !btreeGetHasContent(pBt, *pPgno)? PAGER_GET_NOCONTENT : 0; - rc = btreeGetPage(pBt, *pPgno, ppPage, noContent); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, noContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); + *ppPage = 0; } } searchList = 0; @@ -58582,7 +59525,7 @@ static int allocateBtreePage( MemPage *pPg = 0; TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", pBt->nPage)); assert( pBt->nPage!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, pBt->nPage, &pPg, bNoContent); + rc = btreeGetUnusedPage(pBt, pBt->nPage, &pPg, bNoContent); if( rc==SQLITE_OK ){ rc = sqlite3PagerWrite(pPg->pDbPage); releasePage(pPg); @@ -58596,11 +59539,12 @@ static int allocateBtreePage( *pPgno = pBt->nPage; assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); - rc = btreeGetPage(pBt, *pPgno, ppPage, bNoContent); + rc = btreeGetUnusedPage(pBt, *pPgno, ppPage, bNoContent); if( rc ) return rc; rc = sqlite3PagerWrite((*ppPage)->pDbPage); if( rc!=SQLITE_OK ){ releasePage(*ppPage); + *ppPage = 0; } TRACE(("ALLOCATE: %d from end of file\n", *pPgno)); } @@ -58610,17 +59554,8 @@ static int allocateBtreePage( end_allocate_page: releasePage(pTrunk); releasePage(pPrevTrunk); - if( rc==SQLITE_OK ){ - if( sqlite3PagerPageRefcount((*ppPage)->pDbPage)>1 ){ - releasePage(*ppPage); - *ppPage = 0; - return SQLITE_CORRUPT_BKPT; - } - (*ppPage)->isInit = 0; - }else{ - *ppPage = 0; - } - assert( rc!=SQLITE_OK || sqlite3PagerIswriteable((*ppPage)->pDbPage) ); + assert( rc!=SQLITE_OK || sqlite3PagerPageRefcount((*ppPage)->pDbPage)<=1 ); + assert( rc!=SQLITE_OK || (*ppPage)->isInit==0 ); return rc; } @@ -58645,9 +59580,10 @@ static int freePage2(BtShared *pBt, MemPage *pMemPage, Pgno iPage){ int nFree; /* Initial number of pages on free-list */ assert( sqlite3_mutex_held(pBt->mutex) ); - assert( iPage>1 ); + assert( CORRUPT_DB || iPage>1 ); assert( !pMemPage || pMemPage->pgno==iPage ); + if( iPage<2 ) return SQLITE_CORRUPT_BKPT; if( pMemPage ){ pPage = pMemPage; sqlite3PagerRef(pPage->pDbPage); @@ -58787,7 +59723,7 @@ static int clearCell( u32 ovflPageSize; assert( sqlite3_mutex_held(pPage->pBt->mutex) ); - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); *pnSize = info.nSize; if( info.iOverflow==0 ){ return SQLITE_OK; /* No overflow pages. Return without doing anything */ @@ -58799,7 +59735,9 @@ static int clearCell( assert( pBt->usableSize > 4 ); ovflPageSize = pBt->usableSize - 4; nOvfl = (info.nPayload - info.nLocal + ovflPageSize - 1)/ovflPageSize; - assert( ovflPgno==0 || nOvfl>0 ); + assert( nOvfl>0 || + (CORRUPT_DB && (info.nPayload + ovflPageSize)<ovflPageSize) + ); while( nOvfl-- ){ Pgno iNext = 0; MemPage *pOvfl = 0; @@ -58897,9 +59835,7 @@ static int fillInCell( nSrc = nData; nData = 0; }else{ - if( NEVER(nKey>0x7fffffff || pKey==0) ){ - return SQLITE_CORRUPT_BKPT; - } + assert( nKey<=0x7fffffff && pKey!=0 ); nPayload = (int)nKey; pSrc = pKey; nSrc = (int)nKey; @@ -58939,7 +59875,7 @@ static int fillInCell( #if SQLITE_DEBUG { CellInfo info; - btreeParseCellPtr(pPage, pCell, &info); + pPage->xParseCell(pPage, pCell, &info); assert( nHeader=(int)(info.pPayload - pCell) ); assert( info.nKey==nKey ); assert( *pnSize == info.nSize ); @@ -59054,7 +59990,7 @@ static void dropCell(MemPage *pPage, int idx, int sz, int *pRC){ if( *pRC ) return; assert( idx>=0 && idx<pPage->nCell ); - assert( sz==cellSize(pPage, idx) ); + assert( CORRUPT_DB || sz==cellSize(pPage, idx) ); assert( sqlite3PagerIswriteable(pPage->pDbPage) ); assert( sqlite3_mutex_held(pPage->pBt->mutex) ); data = pPage->aData; @@ -59109,10 +60045,8 @@ static void insertCell( ){ int idx = 0; /* Where to write new cell content in data[] */ int j; /* Loop counter */ - int end; /* First byte past the last cell pointer in data[] */ - int ins; /* Index in data[] where new cell pointer is inserted */ - int cellOffset; /* Address of first cell pointer in data[] */ u8 *data; /* The content of the whole page */ + u8 *pIns; /* The point in pPage->aCellIdx[] where no cell inserted */ if( *pRC ) return; @@ -59127,7 +60061,7 @@ static void insertCell( ** wanted to be less than 4 but got rounded up to 4 on the leaf, then size ** might be less than 8 (leaf-size + pointer) on the interior node. Hence ** the term after the || in the following assert(). */ - assert( sz==cellSizePtr(pPage, pCell) || (sz==8 && iChild>0) ); + assert( sz==pPage->xCellSize(pPage, pCell) || (sz==8 && iChild>0) ); if( pPage->nOverflow || sz+2>pPage->nFree ){ if( pTemp ){ memcpy(pTemp, pCell, sz); @@ -59140,6 +60074,14 @@ static void insertCell( assert( j<(int)(sizeof(pPage->apOvfl)/sizeof(pPage->apOvfl[0])) ); pPage->apOvfl[j] = pCell; pPage->aiOvfl[j] = (u16)i; + + /* When multiple overflows occur, they are always sequential and in + ** sorted order. This invariants arise because multiple overflows can + ** only occur when inserting divider cells into the parent page during + ** balancing, and the dividers are adjacent and sorted. + */ + assert( j==0 || pPage->aiOvfl[j-1]<(u16)i ); /* Overflows in sorted order */ + assert( j==0 || i==pPage->aiOvfl[j-1]+1 ); /* Overflows are sequential */ }else{ int rc = sqlite3PagerWrite(pPage->pDbPage); if( rc!=SQLITE_OK ){ @@ -59148,24 +60090,26 @@ static void insertCell( } assert( sqlite3PagerIswriteable(pPage->pDbPage) ); data = pPage->aData; - cellOffset = pPage->cellOffset; - end = cellOffset + 2*pPage->nCell; - ins = cellOffset + 2*i; + assert( &data[pPage->cellOffset]==pPage->aCellIdx ); rc = allocateSpace(pPage, sz, &idx); if( rc ){ *pRC = rc; return; } - /* The allocateSpace() routine guarantees the following two properties - ** if it returns success */ - assert( idx >= end+2 ); + /* The allocateSpace() routine guarantees the following properties + ** if it returns successfully */ + assert( idx >= 0 ); + assert( idx >= pPage->cellOffset+2*pPage->nCell+2 || CORRUPT_DB ); assert( idx+sz <= (int)pPage->pBt->usableSize ); - pPage->nCell++; pPage->nFree -= (u16)(2 + sz); memcpy(&data[idx], pCell, sz); if( iChild ){ put4byte(&data[idx], iChild); } - memmove(&data[ins+2], &data[ins], end-ins); - put2byte(&data[ins], idx); - put2byte(&data[pPage->hdrOffset+3], pPage->nCell); + pIns = pPage->aCellIdx + i*2; + memmove(pIns+2, pIns, 2*(pPage->nCell - i)); + put2byte(pIns, idx); + pPage->nCell++; + /* increment the cell count */ + if( (++data[pPage->hdrOffset+4])==0 ) data[pPage->hdrOffset+3]++; + assert( get2byte(&data[pPage->hdrOffset+3])==pPage->nCell ); #ifndef SQLITE_OMIT_AUTOVACUUM if( pPage->pBt->autoVacuum ){ /* The cell may contain a pointer to an overflow page. If so, write @@ -59177,6 +60121,52 @@ static void insertCell( } } +/* +** A CellArray object contains a cache of pointers and sizes for a +** consecutive sequence of cells that might be held multiple pages. +*/ +typedef struct CellArray CellArray; +struct CellArray { + int nCell; /* Number of cells in apCell[] */ + MemPage *pRef; /* Reference page */ + u8 **apCell; /* All cells begin balanced */ + u16 *szCell; /* Local size of all cells in apCell[] */ +}; + +/* +** Make sure the cell sizes at idx, idx+1, ..., idx+N-1 have been +** computed. +*/ +static void populateCellCache(CellArray *p, int idx, int N){ + assert( idx>=0 && idx+N<=p->nCell ); + while( N>0 ){ + assert( p->apCell[idx]!=0 ); + if( p->szCell[idx]==0 ){ + p->szCell[idx] = p->pRef->xCellSize(p->pRef, p->apCell[idx]); + }else{ + assert( CORRUPT_DB || + p->szCell[idx]==p->pRef->xCellSize(p->pRef, p->apCell[idx]) ); + } + idx++; + N--; + } +} + +/* +** Return the size of the Nth element of the cell array +*/ +static SQLITE_NOINLINE u16 computeCellSize(CellArray *p, int N){ + assert( N>=0 && N<p->nCell ); + assert( p->szCell[N]==0 ); + p->szCell[N] = p->pRef->xCellSize(p->pRef, p->apCell[N]); + return p->szCell[N]; +} +static u16 cachedCellSize(CellArray *p, int N){ + assert( N>=0 && N<p->nCell ); + if( p->szCell[N] ) return p->szCell[N]; + return computeCellSize(p, N); +} + /* ** Array apCell[] contains pointers to nCell b-tree page cells. The ** szCell[] array contains the size in bytes of each cell. This function @@ -59190,7 +60180,7 @@ static void insertCell( ** The MemPage.nFree field is invalidated by this function. It is the ** responsibility of the caller to set it correctly. */ -static void rebuildPage( +static int rebuildPage( MemPage *pPg, /* Edit this page */ int nCell, /* Final number of cells on page */ u8 **apCell, /* Array of cells */ @@ -59215,10 +60205,12 @@ static void rebuildPage( pCell = &pTmp[pCell - aData]; } pData -= szCell[i]; - memcpy(pData, pCell, szCell[i]); put2byte(pCellptr, (pData - aData)); pCellptr += 2; - assert( szCell[i]==cellSizePtr(pPg, pCell) ); + if( pData < pCellptr ) return SQLITE_CORRUPT_BKPT; + memcpy(pData, pCell, szCell[i]); + assert( szCell[i]==pPg->xCellSize(pPg, pCell) || CORRUPT_DB ); + testcase( szCell[i]!=pPg->xCellSize(pPg,pCell) ); } /* The pPg->nFree field is now set incorrectly. The caller will fix it. */ @@ -59229,6 +60221,7 @@ static void rebuildPage( put2byte(&aData[hdr+3], pPg->nCell); put2byte(&aData[hdr+5], pData - aData); aData[hdr+7] = 0x00; + return SQLITE_OK; } /* @@ -59261,25 +60254,25 @@ static int pageInsertArray( u8 *pBegin, /* End of cell-pointer array */ u8 **ppData, /* IN/OUT: Page content -area pointer */ u8 *pCellptr, /* Pointer to cell-pointer area */ + int iFirst, /* Index of first cell to add */ int nCell, /* Number of cells to add to pPg */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells */ ){ int i; u8 *aData = pPg->aData; u8 *pData = *ppData; - const int bFreelist = aData[1] || aData[2]; + int iEnd = iFirst + nCell; assert( CORRUPT_DB || pPg->hdrOffset==0 ); /* Never called on page 1 */ - for(i=0; i<nCell; i++){ - int sz = szCell[i]; - int rc; + for(i=iFirst; i<iEnd; i++){ + int sz, rc; u8 *pSlot; - if( bFreelist==0 || (pSlot = pageFindSlot(pPg, sz, &rc, 0))==0 ){ + sz = cachedCellSize(pCArray, i); + if( (aData[1]==0 && aData[2]==0) || (pSlot = pageFindSlot(pPg,sz,&rc))==0 ){ pData -= sz; if( pData<pBegin ) return 1; pSlot = pData; } - memcpy(pSlot, apCell[i], sz); + memcpy(pSlot, pCArray->apCell[i], sz); put2byte(pCellptr, (pSlot - aData)); pCellptr += 2; } @@ -59298,22 +60291,27 @@ static int pageInsertArray( */ static int pageFreeArray( MemPage *pPg, /* Page to edit */ + int iFirst, /* First cell to delete */ int nCell, /* Cells to delete */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells */ ){ u8 * const aData = pPg->aData; u8 * const pEnd = &aData[pPg->pBt->usableSize]; u8 * const pStart = &aData[pPg->hdrOffset + 8 + pPg->childPtrSize]; int nRet = 0; int i; + int iEnd = iFirst + nCell; u8 *pFree = 0; int szFree = 0; - for(i=0; i<nCell; i++){ - u8 *pCell = apCell[i]; + for(i=iFirst; i<iEnd; i++){ + u8 *pCell = pCArray->apCell[i]; if( pCell>=pStart && pCell<pEnd ){ - int sz = szCell[i]; + int sz; + /* No need to use cachedCellSize() here. The sizes of all cells that + ** are to be freed have already been computing while deciding which + ** cells need freeing */ + sz = pCArray->szCell[i]; assert( sz>0 ); if( pFree!=(pCell + sz) ){ if( pFree ){ assert( pFree>aData && (pFree - aData)<65536 ); @@ -59348,13 +60346,12 @@ static int pageFreeArray( ** The pPg->nFree field is invalid when this function returns. It is the ** responsibility of the caller to set it correctly. */ -static void editPage( +static int editPage( MemPage *pPg, /* Edit this page */ int iOld, /* Index of first cell currently on page */ int iNew, /* Index of new first cell on page */ int nNew, /* Final number of cells on page */ - u8 **apCell, /* Array of cells */ - u16 *szCell /* Array of cell sizes */ + CellArray *pCArray /* Array of cells and sizes */ ){ u8 * const aData = pPg->aData; const int hdr = pPg->hdrOffset; @@ -59373,16 +60370,12 @@ static void editPage( /* Remove cells from the start and end of the page */ if( iOld<iNew ){ - int nShift = pageFreeArray( - pPg, iNew-iOld, &apCell[iOld], &szCell[iOld] - ); + int nShift = pageFreeArray(pPg, iOld, iNew-iOld, pCArray); memmove(pPg->aCellIdx, &pPg->aCellIdx[nShift*2], nCell*2); nCell -= nShift; } if( iNewEnd < iOldEnd ){ - nCell -= pageFreeArray( - pPg, iOldEnd-iNewEnd, &apCell[iNewEnd], &szCell[iNewEnd] - ); + nCell -= pageFreeArray(pPg, iNewEnd, iOldEnd - iNewEnd, pCArray); } pData = &aData[get2byteNotZero(&aData[hdr+5])]; @@ -59396,7 +60389,7 @@ static void editPage( memmove(&pCellptr[nAdd*2], pCellptr, nCell*2); if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nAdd, &apCell[iNew], &szCell[iNew] + iNew, nAdd, pCArray ) ) goto editpage_fail; nCell += nAdd; } @@ -59410,7 +60403,7 @@ static void editPage( nCell++; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - 1, &apCell[iCell + iNew], &szCell[iCell + iNew] + iCell+iNew, 1, pCArray ) ) goto editpage_fail; } } @@ -59419,7 +60412,7 @@ static void editPage( pCellptr = &pPg->aCellIdx[nCell*2]; if( pageInsertArray( pPg, pBegin, &pData, pCellptr, - nNew-nCell, &apCell[iNew+nCell], &szCell[iNew+nCell] + iNew+nCell, nNew-nCell, pCArray ) ) goto editpage_fail; pPg->nCell = nNew; @@ -59430,19 +60423,21 @@ static void editPage( #ifdef SQLITE_DEBUG for(i=0; i<nNew && !CORRUPT_DB; i++){ - u8 *pCell = apCell[i+iNew]; - int iOff = get2byte(&pPg->aCellIdx[i*2]); + u8 *pCell = pCArray->apCell[i+iNew]; + int iOff = get2byteAligned(&pPg->aCellIdx[i*2]); if( pCell>=aData && pCell<&aData[pPg->pBt->usableSize] ){ pCell = &pTmp[pCell - aData]; } - assert( 0==memcmp(pCell, &aData[iOff], szCell[i+iNew]) ); + assert( 0==memcmp(pCell, &aData[iOff], + pCArray->pRef->xCellSize(pCArray->pRef, pCArray->apCell[i+iNew])) ); } #endif - return; + return SQLITE_OK; editpage_fail: /* Unable to edit this page. Rebuild it from scratch instead. */ - rebuildPage(pPg, nNew, &apCell[iNew], &szCell[iNew]); + populateCellCache(pCArray, iNew, nNew); + return rebuildPage(pPg, nNew, &pCArray->apCell[iNew], &pCArray->szCell[iNew]); } /* @@ -59508,13 +60503,14 @@ static int balance_quick(MemPage *pParent, MemPage *pPage, u8 *pSpace){ u8 *pOut = &pSpace[4]; u8 *pCell = pPage->apOvfl[0]; - u16 szCell = cellSizePtr(pPage, pCell); + u16 szCell = pPage->xCellSize(pPage, pCell); u8 *pStop; assert( sqlite3PagerIswriteable(pNew->pDbPage) ); assert( pPage->aData[0]==(PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF) ); zeroPage(pNew, PTF_INTKEY|PTF_LEAFDATA|PTF_LEAF); - rebuildPage(pNew, 1, &pCell, &szCell); + rc = rebuildPage(pNew, 1, &pCell, &szCell); + if( NEVER(rc) ) return rc; pNew->nFree = pBt->usableSize - pNew->cellOffset - 2 - szCell; /* If this is an auto-vacuum database, update the pointer map @@ -59587,7 +60583,7 @@ static int ptrmapCheckPages(MemPage **apPage, int nPage){ u8 *z; z = findCell(pPage, j); - btreeParseCellPtr(pPage, z, &info); + pPage->xParseCell(pPage, z, &info); if( info.iOverflow ){ Pgno ovfl = get4byte(&z[info.iOverflow]); ptrmapGet(pBt, ovfl, &e, &n); @@ -59718,7 +60714,6 @@ static int balance_nonroot( int bBulk /* True if this call is part of a bulk load */ ){ BtShared *pBt; /* The whole database */ - int nCell = 0; /* Number of cells in apCell[] */ int nMaxCells = 0; /* Allocated size of apCell, szCell, aFrom. */ int nNew = 0; /* Number of pages in apNew[] */ int nOld; /* Number of pages in apOld[] */ @@ -59729,7 +60724,6 @@ static int balance_nonroot( int leafData; /* True if pPage is a leaf of a LEAFDATA tree */ int usableSpace; /* Bytes in pPage beyond the header */ int pageFlags; /* Value of pPage->aData[0] */ - int subtotal; /* Subtotal of bytes in cells on one page */ int iSpace1 = 0; /* First unused byte of aSpace1[] */ int iOvflSpace = 0; /* First unused byte of aOvflSpace[] */ int szScratch; /* Size of scratch memory requested */ @@ -59737,19 +60731,20 @@ static int balance_nonroot( MemPage *apNew[NB+2]; /* pPage and up to NB siblings after balancing */ u8 *pRight; /* Location in parent of right-sibling pointer */ u8 *apDiv[NB-1]; /* Divider cells in pParent */ - int cntNew[NB+2]; /* Index in aCell[] of cell after i-th page */ - int cntOld[NB+2]; /* Old index in aCell[] after i-th page */ + int cntNew[NB+2]; /* Index in b.paCell[] of cell after i-th page */ + int cntOld[NB+2]; /* Old index in b.apCell[] */ int szNew[NB+2]; /* Combined size of cells placed on i-th page */ - u8 **apCell = 0; /* All cells begin balanced */ - u16 *szCell; /* Local size of all cells in apCell[] */ u8 *aSpace1; /* Space for copies of dividers cells */ Pgno pgno; /* Temp var to store a page number in */ u8 abDone[NB+2]; /* True after i'th new page is populated */ Pgno aPgno[NB+2]; /* Page numbers of new pages before shuffling */ Pgno aPgOrder[NB+2]; /* Copy of aPgno[] used for sorting pages */ u16 aPgFlags[NB+2]; /* flags field of new pages before shuffling */ + CellArray b; /* Parsed information on cells being balanced */ memset(abDone, 0, sizeof(abDone)); + b.nCell = 0; + b.apCell = 0; pBt = pParent->pBt; assert( sqlite3_mutex_held(pBt->mutex) ); assert( sqlite3PagerIswriteable(pParent->pDbPage) ); @@ -59803,7 +60798,7 @@ static int balance_nonroot( } pgno = get4byte(pRight); while( 1 ){ - rc = getAndInitPage(pBt, pgno, &apOld[i], 0); + rc = getAndInitPage(pBt, pgno, &apOld[i], 0, 0); if( rc ){ memset(apOld, 0, (i+1)*sizeof(MemPage*)); goto balance_cleanup; @@ -59814,12 +60809,12 @@ static int balance_nonroot( if( i+nxDiv==pParent->aiOvfl[0] && pParent->nOverflow ){ apDiv[i] = pParent->apOvfl[0]; pgno = get4byte(apDiv[i]); - szNew[i] = cellSizePtr(pParent, apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); pParent->nOverflow = 0; }else{ apDiv[i] = findCell(pParent, i+nxDiv-pParent->nOverflow); pgno = get4byte(apDiv[i]); - szNew[i] = cellSizePtr(pParent, apDiv[i]); + szNew[i] = pParent->xCellSize(pParent, apDiv[i]); /* Drop the cell from the parent page. apDiv[i] still points to ** the cell within the parent, even though it has been dropped. @@ -59858,130 +60853,201 @@ static int balance_nonroot( ** Allocate space for memory structures */ szScratch = - nMaxCells*sizeof(u8*) /* apCell */ - + nMaxCells*sizeof(u16) /* szCell */ + nMaxCells*sizeof(u8*) /* b.apCell */ + + nMaxCells*sizeof(u16) /* b.szCell */ + pBt->pageSize; /* aSpace1 */ /* EVIDENCE-OF: R-28375-38319 SQLite will never request a scratch buffer ** that is more than 6 times the database page size. */ assert( szScratch<=6*(int)pBt->pageSize ); - apCell = sqlite3ScratchMalloc( szScratch ); - if( apCell==0 ){ + b.apCell = sqlite3ScratchMalloc( szScratch ); + if( b.apCell==0 ){ rc = SQLITE_NOMEM; goto balance_cleanup; } - szCell = (u16*)&apCell[nMaxCells]; - aSpace1 = (u8*)&szCell[nMaxCells]; + b.szCell = (u16*)&b.apCell[nMaxCells]; + aSpace1 = (u8*)&b.szCell[nMaxCells]; assert( EIGHT_BYTE_ALIGNMENT(aSpace1) ); /* ** Load pointers to all cells on sibling pages and the divider cells - ** into the local apCell[] array. Make copies of the divider cells + ** into the local b.apCell[] array. Make copies of the divider cells ** into space obtained from aSpace1[]. The divider cells have already ** been removed from pParent. ** ** If the siblings are on leaf pages, then the child pointers of the ** divider cells are stripped from the cells before they are copied - ** into aSpace1[]. In this way, all cells in apCell[] are without + ** into aSpace1[]. In this way, all cells in b.apCell[] are without ** child pointers. If siblings are not leaves, then all cell in - ** apCell[] include child pointers. Either way, all cells in apCell[] + ** b.apCell[] include child pointers. Either way, all cells in b.apCell[] ** are alike. ** ** leafCorrection: 4 if pPage is a leaf. 0 if pPage is not a leaf. ** leafData: 1 if pPage holds key+data and pParent holds only keys. */ - leafCorrection = apOld[0]->leaf*4; - leafData = apOld[0]->intKeyLeaf; + b.pRef = apOld[0]; + leafCorrection = b.pRef->leaf*4; + leafData = b.pRef->intKeyLeaf; for(i=0; i<nOld; i++){ - int limit; MemPage *pOld = apOld[i]; + int limit = pOld->nCell; + u8 *aData = pOld->aData; + u16 maskPage = pOld->maskPage; + u8 *piCell = aData + pOld->cellOffset; + u8 *piEnd; + + /* Verify that all sibling pages are of the same "type" (table-leaf, + ** table-interior, index-leaf, or index-interior). + */ + if( pOld->aData[0]!=apOld[0]->aData[0] ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } - limit = pOld->nCell+pOld->nOverflow; + /* Load b.apCell[] with pointers to all cells in pOld. If pOld + ** constains overflow cells, include them in the b.apCell[] array + ** in the correct spot. + ** + ** Note that when there are multiple overflow cells, it is always the + ** case that they are sequential and adjacent. This invariant arises + ** because multiple overflows can only occurs when inserting divider + ** cells into a parent on a prior balance, and divider cells are always + ** adjacent and are inserted in order. There is an assert() tagged + ** with "NOTE 1" in the overflow cell insertion loop to prove this + ** invariant. + ** + ** This must be done in advance. Once the balance starts, the cell + ** offset section of the btree page will be overwritten and we will no + ** long be able to find the cells if a pointer to each cell is not saved + ** first. + */ + memset(&b.szCell[b.nCell], 0, sizeof(b.szCell[0])*limit); if( pOld->nOverflow>0 ){ + memset(&b.szCell[b.nCell+limit], 0, sizeof(b.szCell[0])*pOld->nOverflow); + limit = pOld->aiOvfl[0]; for(j=0; j<limit; j++){ - assert( nCell<nMaxCells ); - apCell[nCell] = findOverflowCell(pOld, j); - szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); - nCell++; + b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell)); + piCell += 2; + b.nCell++; } - }else{ - u8 *aData = pOld->aData; - u16 maskPage = pOld->maskPage; - u16 cellOffset = pOld->cellOffset; - for(j=0; j<limit; j++){ - assert( nCell<nMaxCells ); - apCell[nCell] = findCellv2(aData, maskPage, cellOffset, j); - szCell[nCell] = cellSizePtr(pOld, apCell[nCell]); - nCell++; + for(k=0; k<pOld->nOverflow; k++){ + assert( k==0 || pOld->aiOvfl[k-1]+1==pOld->aiOvfl[k] );/* NOTE 1 */ + b.apCell[b.nCell] = pOld->apOvfl[k]; + b.nCell++; } - } - cntOld[i] = nCell; + } + piEnd = aData + pOld->cellOffset + 2*pOld->nCell; + while( piCell<piEnd ){ + assert( b.nCell<nMaxCells ); + b.apCell[b.nCell] = aData + (maskPage & get2byteAligned(piCell)); + piCell += 2; + b.nCell++; + } + + cntOld[i] = b.nCell; if( i<nOld-1 && !leafData){ u16 sz = (u16)szNew[i]; u8 *pTemp; - assert( nCell<nMaxCells ); - szCell[nCell] = sz; + assert( b.nCell<nMaxCells ); + b.szCell[b.nCell] = sz; pTemp = &aSpace1[iSpace1]; iSpace1 += sz; assert( sz<=pBt->maxLocal+23 ); assert( iSpace1 <= (int)pBt->pageSize ); memcpy(pTemp, apDiv[i], sz); - apCell[nCell] = pTemp+leafCorrection; + b.apCell[b.nCell] = pTemp+leafCorrection; assert( leafCorrection==0 || leafCorrection==4 ); - szCell[nCell] = szCell[nCell] - leafCorrection; + b.szCell[b.nCell] = b.szCell[b.nCell] - leafCorrection; if( !pOld->leaf ){ assert( leafCorrection==0 ); assert( pOld->hdrOffset==0 ); /* The right pointer of the child page pOld becomes the left ** pointer of the divider cell */ - memcpy(apCell[nCell], &pOld->aData[8], 4); + memcpy(b.apCell[b.nCell], &pOld->aData[8], 4); }else{ assert( leafCorrection==4 ); - if( szCell[nCell]<4 ){ + while( b.szCell[b.nCell]<4 ){ /* Do not allow any cells smaller than 4 bytes. If a smaller cell ** does exist, pad it with 0x00 bytes. */ - assert( szCell[nCell]==3 ); - assert( apCell[nCell]==&aSpace1[iSpace1-3] ); + assert( b.szCell[b.nCell]==3 || CORRUPT_DB ); + assert( b.apCell[b.nCell]==&aSpace1[iSpace1-3] || CORRUPT_DB ); aSpace1[iSpace1++] = 0x00; - szCell[nCell] = 4; + b.szCell[b.nCell]++; } } - nCell++; + b.nCell++; } } /* - ** Figure out the number of pages needed to hold all nCell cells. + ** Figure out the number of pages needed to hold all b.nCell cells. ** Store this number in "k". Also compute szNew[] which is the total ** size of all cells on the i-th page and cntNew[] which is the index - ** in apCell[] of the cell that divides page i from page i+1. - ** cntNew[k] should equal nCell. + ** in b.apCell[] of the cell that divides page i from page i+1. + ** cntNew[k] should equal b.nCell. ** ** Values computed by this block: ** ** k: The total number of sibling pages ** szNew[i]: Spaced used on the i-th sibling page. - ** cntNew[i]: Index in apCell[] and szCell[] for the first cell to + ** cntNew[i]: Index in b.apCell[] and b.szCell[] for the first cell to ** the right of the i-th sibling page. ** usableSpace: Number of bytes of space available on each sibling. ** */ usableSpace = pBt->usableSize - 12 + leafCorrection; - for(subtotal=k=i=0; i<nCell; i++){ - assert( i<nMaxCells ); - subtotal += szCell[i] + 2; - if( subtotal > usableSpace ){ - szNew[k] = subtotal - szCell[i] - 2; - cntNew[k] = i; - if( leafData ){ i--; } - subtotal = 0; - k++; - if( k>NB+1 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } - } - } - szNew[k] = subtotal; - cntNew[k] = nCell; - k++; + for(i=0; i<nOld; i++){ + MemPage *p = apOld[i]; + szNew[i] = usableSpace - p->nFree; + if( szNew[i]<0 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + for(j=0; j<p->nOverflow; j++){ + szNew[i] += 2 + p->xCellSize(p, p->apOvfl[j]); + } + cntNew[i] = cntOld[i]; + } + k = nOld; + for(i=0; i<k; i++){ + int sz; + while( szNew[i]>usableSpace ){ + if( i+1>=k ){ + k = i+2; + if( k>NB+2 ){ rc = SQLITE_CORRUPT_BKPT; goto balance_cleanup; } + szNew[k-1] = 0; + cntNew[k-1] = b.nCell; + } + sz = 2 + cachedCellSize(&b, cntNew[i]-1); + szNew[i] -= sz; + if( !leafData ){ + if( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + }else{ + sz = 0; + } + } + szNew[i+1] += sz; + cntNew[i]--; + } + while( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + if( szNew[i]+sz>usableSpace ) break; + szNew[i] += sz; + cntNew[i]++; + if( !leafData ){ + if( cntNew[i]<b.nCell ){ + sz = 2 + cachedCellSize(&b, cntNew[i]); + }else{ + sz = 0; + } + } + szNew[i+1] -= sz; + } + if( cntNew[i]>=b.nCell ){ + k = i+1; + }else if( cntNew[i] <= (i>0 ? cntNew[i-1] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } + } /* ** The packing computed by the previous block is biased toward the siblings @@ -60002,19 +61068,27 @@ static int balance_nonroot( r = cntNew[i-1] - 1; d = r + 1 - leafData; - assert( d<nMaxCells ); - assert( r<nMaxCells ); - while( szRight==0 - || (!bBulk && szRight+szCell[d]+2<=szLeft-(szCell[r]+2)) - ){ - szRight += szCell[d] + 2; - szLeft -= szCell[r] + 2; - cntNew[i-1]--; - r = cntNew[i-1] - 1; - d = r + 1 - leafData; - } + (void)cachedCellSize(&b, d); + do{ + assert( d<nMaxCells ); + assert( r<nMaxCells ); + (void)cachedCellSize(&b, r); + if( szRight!=0 + && (bBulk || szRight+b.szCell[d]+2 > szLeft-(b.szCell[r]+2)) ){ + break; + } + szRight += b.szCell[d] + 2; + szLeft -= b.szCell[r] + 2; + cntNew[i-1] = r; + r--; + d--; + }while( r>=0 ); szNew[i] = szRight; szNew[i-1] = szLeft; + if( cntNew[i-1] <= (i>1 ? cntNew[i-2] : 0) ){ + rc = SQLITE_CORRUPT_BKPT; + goto balance_cleanup; + } } /* Sanity check: For a non-corrupt database file one of the follwing @@ -60034,10 +61108,6 @@ static int balance_nonroot( /* ** Allocate k new pages. Reuse old pages where possible. */ - if( apOld[0]->pgno<=1 ){ - rc = SQLITE_CORRUPT_BKPT; - goto balance_cleanup; - } pageFlags = apOld[0]->aData[0]; for(i=0; i<k; i++){ MemPage *pNew; @@ -60054,7 +61124,7 @@ static int balance_nonroot( zeroPage(pNew, pageFlags); apNew[i] = pNew; nNew++; - cntOld[i] = nCell; + cntOld[i] = b.nCell; /* Set the pointer-map entry for the new sibling page. */ if( ISAUTOVACUUM ){ @@ -60159,8 +61229,8 @@ static int balance_nonroot( int iNew = 0; int iOld = 0; - for(i=0; i<nCell; i++){ - u8 *pCell = apCell[i]; + for(i=0; i<b.nCell; i++){ + u8 *pCell = b.apCell[i]; if( i==cntOldNext ){ MemPage *pOld = (++iOld)<nNew ? apNew[iOld] : apOld[iOld]; cntOldNext += pOld->nCell + pOld->nOverflow + !leafData; @@ -60185,9 +61255,10 @@ static int balance_nonroot( if( !leafCorrection ){ ptrmapPut(pBt, get4byte(pCell), PTRMAP_BTREE, pNew->pgno, &rc); } - if( szCell[i]>pNew->minLocal ){ + if( cachedCellSize(&b,i)>pNew->minLocal ){ ptrmapPutOvflPtr(pNew, pCell, &rc); } + if( rc ) goto balance_cleanup; } } } @@ -60201,20 +61272,21 @@ static int balance_nonroot( j = cntNew[i]; assert( j<nMaxCells ); - pCell = apCell[j]; - sz = szCell[j] + leafCorrection; + assert( b.apCell[j]!=0 ); + pCell = b.apCell[j]; + sz = b.szCell[j] + leafCorrection; pTemp = &aOvflSpace[iOvflSpace]; if( !pNew->leaf ){ memcpy(&pNew->aData[8], pCell, 4); }else if( leafData ){ /* If the tree is a leaf-data tree, and the siblings are leaves, - ** then there is no divider cell in apCell[]. Instead, the divider + ** then there is no divider cell in b.apCell[]. Instead, the divider ** cell consists of the integer key for the right-most cell of ** the sibling-page assembled above only. */ CellInfo info; j--; - btreeParseCellPtr(pNew, apCell[j], &info); + pNew->xParseCell(pNew, b.apCell[j], &info); pCell = pTemp; sz = 4 + putVarint(&pCell[4], info.nKey); pTemp = 0; @@ -60231,9 +61303,9 @@ static int balance_nonroot( ** cells are at least 4 bytes. It only happens in b-trees used ** to evaluate "IN (SELECT ...)" and similar clauses. */ - if( szCell[j]==4 ){ + if( b.szCell[j]==4 ){ assert(leafCorrection==4); - sz = cellSizePtr(pParent, pCell); + sz = pParent->xCellSize(pParent, pCell); } } iOvflSpace += sz; @@ -60289,12 +61361,13 @@ static int balance_nonroot( iNew = iOld = 0; nNewCell = cntNew[0]; }else{ - iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : nCell; + iOld = iPg<nOld ? (cntOld[iPg-1] + !leafData) : b.nCell; iNew = cntNew[iPg-1] + !leafData; nNewCell = cntNew[iPg] - iNew; } - editPage(apNew[iPg], iOld, iNew, nNewCell, apCell, szCell); + rc = editPage(apNew[iPg], iOld, iNew, nNewCell, &b); + if( rc ) goto balance_cleanup; abDone[iPg]++; apNew[iPg]->nFree = usableSpace-szNew[iPg]; assert( apNew[iPg]->nOverflow==0 ); @@ -60345,7 +61418,7 @@ static int balance_nonroot( assert( pParent->isInit ); TRACE(("BALANCE: finished: old=%d new=%d cells=%d\n", - nOld, nNew, nCell)); + nOld, nNew, b.nCell)); /* Free any old pages that were not reused as new pages. */ @@ -60368,7 +61441,7 @@ static int balance_nonroot( ** Cleanup before returning. */ balance_cleanup: - sqlite3ScratchFree(apCell); + sqlite3ScratchFree(b.apCell); for(i=0; i<nOld; i++){ releasePage(apOld[i]); } @@ -60643,24 +61716,28 @@ SQLITE_PRIVATE int sqlite3BtreeInsert( ** doing any work. To avoid thwarting these optimizations, it is important ** not to clear the cursor here. */ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } if( pCur->pKeyInfo==0 ){ + assert( pKey==0 ); /* If this is an insert into a table b-tree, invalidate any incrblob ** cursors open on the row being replaced */ invalidateIncrblobCursors(p, nKey, 0); /* If the cursor is currently on the last row and we are appending a - ** new row onto the end, set the "loc" to avoid an unnecessary btreeMoveto() - ** call */ + ** new row onto the end, set the "loc" to avoid an unnecessary + ** btreeMoveto() call */ if( (pCur->curFlags&BTCF_ValidNKey)!=0 && nKey>0 && pCur->info.nKey==nKey-1 ){ - loc = -1; + loc = -1; + }else if( loc==0 ){ + rc = sqlite3BtreeMovetoUnpacked(pCur, 0, nKey, appendBias, &loc); + if( rc ) return rc; } - } - - if( !loc ){ + }else if( loc==0 ){ rc = btreeMoveto(pCur, pKey, nKey, appendBias, &loc); if( rc ) return rc; } @@ -60678,7 +61755,7 @@ SQLITE_PRIVATE int sqlite3BtreeInsert( assert( newCell!=0 ); rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, nZero, &szNew); if( rc ) goto end_insert; - assert( szNew==cellSizePtr(pPage, newCell) ); + assert( szNew==pPage->xCellSize(pPage, newCell) ); assert( szNew <= MX_CELL_SIZE(pBt) ); idx = pCur->aiIdx[pCur->iPage]; if( loc==0 ){ @@ -60762,12 +61839,8 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ assert( pCur->curFlags & BTCF_WriteFlag ); assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) ); assert( !hasReadConflicts(p, pCur->pgnoRoot) ); - - if( NEVER(pCur->aiIdx[pCur->iPage]>=pCur->apPage[pCur->iPage]->nCell) - || NEVER(pCur->eState!=CURSOR_VALID) - ){ - return SQLITE_ERROR; /* Something has gone awry. */ - } + assert( pCur->aiIdx[pCur->iPage]<pCur->apPage[pCur->iPage]->nCell ); + assert( pCur->eState==CURSOR_VALID ); iCellDepth = pCur->iPage; iCellIdx = pCur->aiIdx[iCellDepth]; @@ -60792,8 +61865,10 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ ** deleted writable. Then free any overflow pages associated with the ** entry and finally remove the cell itself from within the page. */ - rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); - if( rc ) return rc; + if( pCur->curFlags & BTCF_Multiple ){ + rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur); + if( rc ) return rc; + } /* If this is a delete operation to remove a row from a table b-tree, ** invalidate any incrblob cursors open on the row being deleted. */ @@ -60819,7 +61894,8 @@ SQLITE_PRIVATE int sqlite3BtreeDelete(BtCursor *pCur){ unsigned char *pTmp; pCell = findCell(pLeaf, pLeaf->nCell-1); - nCell = cellSizePtr(pLeaf, pCell); + if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT; + nCell = pLeaf->xCellSize(pLeaf, pCell); assert( MX_CELL_SIZE(pBt) >= nCell ); pTmp = pBt->pTmpSpace; assert( pTmp!=0 ); @@ -60911,7 +61987,8 @@ static int btreeCreateTable(Btree *p, int *piTable, int createTabFlags){ pgnoRoot==PENDING_BYTE_PAGE(pBt) ){ pgnoRoot++; } - assert( pgnoRoot>=3 ); + assert( pgnoRoot>=3 || CORRUPT_DB ); + testcase( pgnoRoot<3 ); /* Allocate a page. The page that currently resides at pgnoRoot will ** be moved to the allocated page (unless the allocated page happens @@ -61040,7 +62117,7 @@ static int clearDatabasePage( if( pgno>btreePagecount(pBt) ){ return SQLITE_CORRUPT_BKPT; } - rc = getAndInitPage(pBt, pgno, &pPage, 0); + rc = getAndInitPage(pBt, pgno, &pPage, 0, 0); if( rc ) return rc; if( pPage->bBusy ){ rc = SQLITE_CORRUPT_BKPT; @@ -61061,7 +62138,8 @@ static int clearDatabasePage( rc = clearDatabasePage(pBt, get4byte(&pPage->aData[hdr+8]), 1, pnChange); if( rc ) goto cleardatabasepage_out; }else if( pnChange ){ - assert( pPage->intKey ); + assert( pPage->intKey || CORRUPT_DB ); + testcase( !pPage->intKey ); *pnChange += pPage->nCell; } if( freePageFlag ){ @@ -61640,35 +62718,42 @@ static int btreeHeapPull(u32 *aHeap, u32 *pOut){ ** ** 1. Make sure that cells and freeblocks do not overlap ** but combine to completely cover the page. -** NO 2. Make sure cell keys are in order. -** NO 3. Make sure no key is less than or equal to zLowerBound. -** NO 4. Make sure no key is greater than or equal to zUpperBound. -** 5. Check the integrity of overflow pages. -** 6. Recursively call checkTreePage on all children. -** 7. Verify that the depth of all children is the same. -** 8. Make sure this page is at least 33% full or else it is -** the root of the tree. +** 2. Make sure integer cell keys are in order. +** 3. Check the integrity of overflow pages. +** 4. Recursively call checkTreePage on all children. +** 5. Verify that the depth of all children is the same. */ static int checkTreePage( IntegrityCk *pCheck, /* Context for the sanity check */ int iPage, /* Page number of the page to check */ - i64 *pnParentMinKey, - i64 *pnParentMaxKey + i64 *piMinKey, /* Write minimum integer primary key here */ + i64 maxKey /* Error if integer primary key greater than this */ ){ - MemPage *pPage; - int i, rc, depth, d2, pgno, cnt; - int hdr, cellStart; - int nCell; - u8 *data; - BtShared *pBt; - int usableSize; - u32 *heap = 0; - u32 x, prev = 0; - i64 nMinKey = 0; - i64 nMaxKey = 0; + MemPage *pPage = 0; /* The page being analyzed */ + int i; /* Loop counter */ + int rc; /* Result code from subroutine call */ + int depth = -1, d2; /* Depth of a subtree */ + int pgno; /* Page number */ + int nFrag; /* Number of fragmented bytes on the page */ + int hdr; /* Offset to the page header */ + int cellStart; /* Offset to the start of the cell pointer array */ + int nCell; /* Number of cells */ + int doCoverageCheck = 1; /* True if cell coverage checking should be done */ + int keyCanBeEqual = 1; /* True if IPK can be equal to maxKey + ** False if IPK must be strictly less than maxKey */ + u8 *data; /* Page content */ + u8 *pCell; /* Cell content */ + u8 *pCellIdx; /* Next element of the cell pointer array */ + BtShared *pBt; /* The BtShared object that owns pPage */ + u32 pc; /* Address of a cell */ + u32 usableSize; /* Usable size of the page */ + u32 contentOffset; /* Offset to the start of the cell content area */ + u32 *heap = 0; /* Min-heap used for checking cell coverage */ + u32 x, prev = 0; /* Next and previous entry on the min-heap */ const char *saved_zPfx = pCheck->zPfx; int saved_v1 = pCheck->v1; int saved_v2 = pCheck->v2; + u8 savedIsInit = 0; /* Check that the page exists */ @@ -61681,54 +62766,95 @@ static int checkTreePage( if( (rc = btreeGetPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){ checkAppendMsg(pCheck, "unable to get the page. error code=%d", rc); - depth = -1; goto end_of_check; } /* Clear MemPage.isInit to make sure the corruption detection code in ** btreeInitPage() is executed. */ + savedIsInit = pPage->isInit; pPage->isInit = 0; if( (rc = btreeInitPage(pPage))!=0 ){ assert( rc==SQLITE_CORRUPT ); /* The only possible error from InitPage */ checkAppendMsg(pCheck, "btreeInitPage() returns error code %d", rc); - releasePage(pPage); - depth = -1; goto end_of_check; } + data = pPage->aData; + hdr = pPage->hdrOffset; + + /* Set up for cell analysis */ + pCheck->zPfx = "On tree page %d cell %d: "; + contentOffset = get2byteNotZero(&data[hdr+5]); + assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ + + /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the + ** number of cells on the page. */ + nCell = get2byte(&data[hdr+3]); + assert( pPage->nCell==nCell ); + + /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page + ** immediately follows the b-tree page header. */ + cellStart = hdr + 12 - 4*pPage->leaf; + assert( pPage->aCellIdx==&data[cellStart] ); + pCellIdx = &data[cellStart + 2*(nCell-1)]; + + if( !pPage->leaf ){ + /* Analyze the right-child page of internal pages */ + pgno = get4byte(&data[hdr+8]); +#ifndef SQLITE_OMIT_AUTOVACUUM + if( pBt->autoVacuum ){ + pCheck->zPfx = "On page %d at right child: "; + checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); + } +#endif + depth = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + }else{ + /* For leaf pages, the coverage check will occur in the same loop + ** as the other cell checks, so initialize the heap. */ + heap = pCheck->heap; + heap[0] = 0; + } - /* Check out all the cells. - */ - depth = 0; - for(i=0; i<pPage->nCell && pCheck->mxErr; i++){ - u8 *pCell; - u32 sz; + /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte + ** integer offsets to the cell contents. */ + for(i=nCell-1; i>=0 && pCheck->mxErr; i--){ CellInfo info; - /* Check payload overflow pages - */ - pCheck->zPfx = "On tree page %d cell %d: "; - pCheck->v1 = iPage; + /* Check cell size */ pCheck->v2 = i; - pCell = findCell(pPage,i); - btreeParseCellPtr(pPage, pCell, &info); - sz = info.nPayload; - /* For intKey pages, check that the keys are in order. - */ + assert( pCellIdx==&data[cellStart + i*2] ); + pc = get2byteAligned(pCellIdx); + pCellIdx -= 2; + if( pc<contentOffset || pc>usableSize-4 ){ + checkAppendMsg(pCheck, "Offset %d out of range %d..%d", + pc, contentOffset, usableSize-4); + doCoverageCheck = 0; + continue; + } + pCell = &data[pc]; + pPage->xParseCell(pPage, pCell, &info); + if( pc+info.nSize>usableSize ){ + checkAppendMsg(pCheck, "Extends off end of page"); + doCoverageCheck = 0; + continue; + } + + /* Check for integer primary key out of range */ if( pPage->intKey ){ - if( i==0 ){ - nMinKey = nMaxKey = info.nKey; - }else if( info.nKey <= nMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (previous was %lld)", info.nKey, nMaxKey); + if( keyCanBeEqual ? (info.nKey > maxKey) : (info.nKey >= maxKey) ){ + checkAppendMsg(pCheck, "Rowid %lld out of order", info.nKey); } - nMaxKey = info.nKey; + maxKey = info.nKey; } - if( (sz>info.nLocal) - && (&pCell[info.iOverflow]<=&pPage->aData[pBt->usableSize]) - ){ - int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4); - Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]); + + /* Check the content overflow list */ + if( info.nPayload>info.nLocal ){ + int nPage; /* Number of pages on the overflow chain */ + Pgno pgnoOvfl; /* First page of the overflow chain */ + assert( pc + info.iOverflow <= usableSize ); + nPage = (info.nPayload - info.nLocal + usableSize - 5)/(usableSize - 4); + pgnoOvfl = get4byte(&pCell[info.iOverflow]); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage); @@ -61737,118 +62863,57 @@ static int checkTreePage( checkList(pCheck, 0, pgnoOvfl, nPage); } - /* Check sanity of left child page. - */ if( !pPage->leaf ){ + /* Check sanity of left child page for internal pages */ pgno = get4byte(pCell); #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum ){ checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); } #endif - d2 = checkTreePage(pCheck, pgno, &nMinKey, i==0?NULL:&nMaxKey); - if( i>0 && d2!=depth ){ + d2 = checkTreePage(pCheck, pgno, &maxKey, maxKey); + keyCanBeEqual = 0; + if( d2!=depth ){ checkAppendMsg(pCheck, "Child page depth differs"); + depth = d2; } - depth = d2; - } - } - - if( !pPage->leaf ){ - pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]); - pCheck->zPfx = "On page %d at right child: "; - pCheck->v1 = iPage; -#ifndef SQLITE_OMIT_AUTOVACUUM - if( pBt->autoVacuum ){ - checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage); - } -#endif - checkTreePage(pCheck, pgno, NULL, !pPage->nCell?NULL:&nMaxKey); - } - - /* For intKey leaf pages, check that the min/max keys are in order - ** with any left/parent/right pages. - */ - pCheck->zPfx = "Page %d: "; - pCheck->v1 = iPage; - if( pPage->leaf && pPage->intKey ){ - /* if we are a left child page */ - if( pnParentMinKey ){ - /* if we are the left most child page */ - if( !pnParentMaxKey ){ - if( nMaxKey > *pnParentMinKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (max larger than parent min of %lld)", - nMaxKey, *pnParentMinKey); - } - }else{ - if( nMinKey <= *pnParentMinKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (min less than parent min of %lld)", - nMinKey, *pnParentMinKey); - } - if( nMaxKey > *pnParentMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (max larger than parent max of %lld)", - nMaxKey, *pnParentMaxKey); - } - *pnParentMinKey = nMaxKey; - } - /* else if we're a right child page */ - } else if( pnParentMaxKey ){ - if( nMinKey <= *pnParentMaxKey ){ - checkAppendMsg(pCheck, - "Rowid %lld out of order (min less than parent max of %lld)", - nMinKey, *pnParentMaxKey); - } + }else{ + /* Populate the coverage-checking heap for leaf pages */ + btreeHeapInsert(heap, (pc<<16)|(pc+info.nSize-1)); } } + *piMinKey = maxKey; /* Check for complete coverage of the page */ - data = pPage->aData; - hdr = pPage->hdrOffset; - heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); pCheck->zPfx = 0; - if( heap==0 ){ - pCheck->mallocFailed = 1; - }else{ - int contentOffset = get2byteNotZero(&data[hdr+5]); - assert( contentOffset<=usableSize ); /* Enforced by btreeInitPage() */ - heap[0] = 0; - btreeHeapInsert(heap, contentOffset-1); - /* EVIDENCE-OF: R-37002-32774 The two-byte integer at offset 3 gives the - ** number of cells on the page. */ - nCell = get2byte(&data[hdr+3]); - /* EVIDENCE-OF: R-23882-45353 The cell pointer array of a b-tree page - ** immediately follows the b-tree page header. */ - cellStart = hdr + 12 - 4*pPage->leaf; - /* EVIDENCE-OF: R-02776-14802 The cell pointer array consists of K 2-byte - ** integer offsets to the cell contents. */ - for(i=0; i<nCell; i++){ - int pc = get2byte(&data[cellStart+i*2]); - u32 size = 65536; - if( pc<=usableSize-4 ){ - size = cellSizePtr(pPage, &data[pc]); - } - if( (int)(pc+size-1)>=usableSize ){ - pCheck->zPfx = 0; - checkAppendMsg(pCheck, - "Corruption detected in cell %d on page %d",i,iPage); - }else{ + if( doCoverageCheck && pCheck->mxErr>0 ){ + /* For leaf pages, the min-heap has already been initialized and the + ** cells have already been inserted. But for internal pages, that has + ** not yet been done, so do it now */ + if( !pPage->leaf ){ + heap = pCheck->heap; + heap[0] = 0; + for(i=nCell-1; i>=0; i--){ + u32 size; + pc = get2byteAligned(&data[cellStart+i*2]); + size = pPage->xCellSize(pPage, &data[pc]); btreeHeapInsert(heap, (pc<<16)|(pc+size-1)); } } - /* EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header + /* Add the freeblocks to the min-heap + ** + ** EVIDENCE-OF: R-20690-50594 The second field of the b-tree page header ** is the offset of the first freeblock, or zero if there are no - ** freeblocks on the page. */ + ** freeblocks on the page. + */ i = get2byte(&data[hdr+1]); while( i>0 ){ int size, j; - assert( i<=usableSize-4 ); /* Enforced by btreeInitPage() */ + assert( (u32)i<=usableSize-4 ); /* Enforced by btreeInitPage() */ size = get2byte(&data[i+2]); - assert( i+size<=usableSize ); /* Enforced by btreeInitPage() */ - btreeHeapInsert(heap, (i<<16)|(i+size-1)); + assert( (u32)(i+size)<=usableSize ); /* Enforced by btreeInitPage() */ + btreeHeapInsert(heap, (((u32)i)<<16)|(i+size-1)); /* EVIDENCE-OF: R-58208-19414 The first 2 bytes of a freeblock are a ** big-endian integer which is the offset in the b-tree page of the next ** freeblock in the chain, or zero if the freeblock is the last on the @@ -61857,39 +62922,50 @@ static int checkTreePage( /* EVIDENCE-OF: R-06866-39125 Freeblocks are always connected in order of ** increasing offset. */ assert( j==0 || j>i+size ); /* Enforced by btreeInitPage() */ - assert( j<=usableSize-4 ); /* Enforced by btreeInitPage() */ + assert( (u32)j<=usableSize-4 ); /* Enforced by btreeInitPage() */ i = j; } - cnt = 0; - assert( heap[0]>0 ); - assert( (heap[1]>>16)==0 ); - btreeHeapPull(heap,&prev); + /* Analyze the min-heap looking for overlap between cells and/or + ** freeblocks, and counting the number of untracked bytes in nFrag. + ** + ** Each min-heap entry is of the form: (start_address<<16)|end_address. + ** There is an implied first entry the covers the page header, the cell + ** pointer index, and the gap between the cell pointer index and the start + ** of cell content. + ** + ** The loop below pulls entries from the min-heap in order and compares + ** the start_address against the previous end_address. If there is an + ** overlap, that means bytes are used multiple times. If there is a gap, + ** that gap is added to the fragmentation count. + */ + nFrag = 0; + prev = contentOffset - 1; /* Implied first min-heap entry */ while( btreeHeapPull(heap,&x) ){ - if( (prev&0xffff)+1>(x>>16) ){ + if( (prev&0xffff)>=(x>>16) ){ checkAppendMsg(pCheck, "Multiple uses for byte %u of page %d", x>>16, iPage); break; }else{ - cnt += (x>>16) - (prev&0xffff) - 1; + nFrag += (x>>16) - (prev&0xffff) - 1; prev = x; } } - cnt += usableSize - (prev&0xffff) - 1; + nFrag += usableSize - (prev&0xffff) - 1; /* EVIDENCE-OF: R-43263-13491 The total number of bytes in all fragments ** is stored in the fifth field of the b-tree page header. ** EVIDENCE-OF: R-07161-27322 The one-byte integer at offset 7 gives the ** number of fragmented free bytes within the cell content area. */ - if( heap[0]==0 && cnt!=data[hdr+7] ){ + if( heap[0]==0 && nFrag!=data[hdr+7] ){ checkAppendMsg(pCheck, "Fragmentation of %d bytes reported as %d on page %d", - cnt, data[hdr+7], iPage); + nFrag, data[hdr+7], iPage); } } - sqlite3PageFree(heap); - releasePage(pPage); end_of_check: + if( !doCoverageCheck ) pPage->isInit = savedIsInit; + releasePage(pPage); pCheck->zPfx = saved_zPfx; pCheck->v1 = saved_v1; pCheck->v2 = saved_v2; @@ -61919,14 +62995,15 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( int *pnErr /* Write number of errors seen to this variable */ ){ Pgno i; - int nRef; IntegrityCk sCheck; BtShared *pBt = p->pBt; + int savedDbFlags = pBt->db->flags; char zErr[100]; + VVA_ONLY( int nRef ); sqlite3BtreeEnter(p); assert( p->inTrans>TRANS_NONE && pBt->inTransaction>TRANS_NONE ); - nRef = sqlite3PagerRefcount(pBt->pPager); + assert( (nRef = sqlite3PagerRefcount(pBt->pPager))>=0 ); sCheck.pBt = pBt; sCheck.pPager = pBt->pPager; sCheck.nPage = btreePagecount(sCheck.pBt); @@ -61936,21 +63013,26 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( sCheck.zPfx = 0; sCheck.v1 = 0; sCheck.v2 = 0; - *pnErr = 0; + sCheck.aPgRef = 0; + sCheck.heap = 0; + sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); if( sCheck.nPage==0 ){ - sqlite3BtreeLeave(p); - return 0; + goto integrity_ck_cleanup; } sCheck.aPgRef = sqlite3MallocZero((sCheck.nPage / 8)+ 1); if( !sCheck.aPgRef ){ - *pnErr = 1; - sqlite3BtreeLeave(p); - return 0; + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; } + sCheck.heap = (u32*)sqlite3PageMalloc( pBt->pageSize ); + if( sCheck.heap==0 ){ + sCheck.mallocFailed = 1; + goto integrity_ck_cleanup; + } + i = PENDING_BYTE_PAGE(pBt); if( i<=sCheck.nPage ) setPageReferenced(&sCheck, i); - sqlite3StrAccumInit(&sCheck.errMsg, 0, zErr, sizeof(zErr), SQLITE_MAX_LENGTH); /* Check the integrity of the freelist */ @@ -61961,17 +63043,19 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( /* Check all the tables. */ + testcase( pBt->db->flags & SQLITE_CellSizeCk ); + pBt->db->flags &= ~SQLITE_CellSizeCk; for(i=0; (int)i<nRoot && sCheck.mxErr; i++){ + i64 notUsed; if( aRoot[i]==0 ) continue; #ifndef SQLITE_OMIT_AUTOVACUUM if( pBt->autoVacuum && aRoot[i]>1 ){ checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0); } #endif - sCheck.zPfx = "List of tree roots: "; - checkTreePage(&sCheck, aRoot[i], NULL, NULL); - sCheck.zPfx = 0; + checkTreePage(&sCheck, aRoot[i], ¬Used, LARGEST_INT64); } + pBt->db->flags = savedDbFlags; /* Make sure every page in the file is referenced */ @@ -61995,28 +63079,20 @@ SQLITE_PRIVATE char *sqlite3BtreeIntegrityCheck( #endif } - /* Make sure this analysis did not leave any unref() pages. - ** This is an internal consistency check; an integrity check - ** of the integrity check. - */ - if( NEVER(nRef != sqlite3PagerRefcount(pBt->pPager)) ){ - checkAppendMsg(&sCheck, - "Outstanding page count goes from %d to %d during this analysis", - nRef, sqlite3PagerRefcount(pBt->pPager) - ); - } - /* Clean up and report errors. */ - sqlite3BtreeLeave(p); +integrity_ck_cleanup: + sqlite3PageFree(sCheck.heap); sqlite3_free(sCheck.aPgRef); if( sCheck.mallocFailed ){ sqlite3StrAccumReset(&sCheck.errMsg); - *pnErr = sCheck.nErr+1; - return 0; + sCheck.nErr++; } *pnErr = sCheck.nErr; if( sCheck.nErr==0 ) sqlite3StrAccumReset(&sCheck.errMsg); + /* Make sure this analysis did not leave any unref() pages. */ + assert( nRef==sqlite3PagerRefcount(pBt->pPager) ); + sqlite3BtreeLeave(p); return sqlite3StrAccumFinish(&sCheck.errMsg); } #endif /* SQLITE_OMIT_INTEGRITY_CHECK */ @@ -62227,6 +63303,7 @@ SQLITE_PRIVATE int sqlite3BtreePutData(BtCursor *pCsr, u32 offset, u32 amt, void */ SQLITE_PRIVATE void sqlite3BtreeIncrblobCursor(BtCursor *pCur){ pCur->curFlags |= BTCF_Incrblob; + pCur->pBtree->hasIncrblobCur = 1; } #endif @@ -62312,6 +63389,8 @@ SQLITE_PRIVATE int sqlite3HeaderSizeBtree(void){ return ROUND8(sizeof(MemPage)); ** This file contains the implementation of the sqlite3_backup_XXX() ** API functions and the related features. */ +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ /* ** Structure allocated for each backup operation. @@ -62983,9 +64062,13 @@ SQLITE_API int SQLITE_STDCALL sqlite3_backup_pagecount(sqlite3_backup *p){ ** corresponding to the source database is held when this function is ** called. */ -SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ - sqlite3_backup *p; /* Iterator variable */ - for(p=pBackup; p; p=p->pNext){ +static SQLITE_NOINLINE void backupUpdate( + sqlite3_backup *p, + Pgno iPage, + const u8 *aData +){ + assert( p!=0 ); + do{ assert( sqlite3_mutex_held(p->pSrc->pBt->mutex) ); if( !isFatalError(p->rc) && iPage<p->iNext ){ /* The backup process p has already copied page iPage. But now it @@ -63002,7 +64085,10 @@ SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, con p->rc = rc; } } - } + }while( (p = p->pNext)!=0 ); +} +SQLITE_PRIVATE void sqlite3BackupUpdate(sqlite3_backup *pBackup, Pgno iPage, const u8 *aData){ + if( pBackup ) backupUpdate(pBackup, iPage, aData); } /* @@ -63103,6 +64189,8 @@ copy_finished: ** only within the VDBE. Interface routines refer to a Mem using the ** name sqlite_value */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifdef SQLITE_DEBUG /* @@ -63674,7 +64762,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem *pMem){ SQLITE_PRIVATE void sqlite3VdbeMemCast(Mem *pMem, u8 aff, u8 encoding){ if( pMem->flags & MEM_Null ) return; switch( aff ){ - case SQLITE_AFF_NONE: { /* Really a cast to BLOB */ + case SQLITE_AFF_BLOB: { /* Really a cast to BLOB */ if( (pMem->flags & MEM_Blob)==0 ){ sqlite3ValueApplyAffinity(pMem, SQLITE_AFF_TEXT, encoding); assert( pMem->flags & MEM_Str || pMem->db->mallocFailed ); @@ -63856,10 +64944,6 @@ SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){ } #endif /* SQLITE_DEBUG */ -/* -** Size of struct Mem not including the Mem.zMalloc member. -*/ -#define MEMCELLSIZE offsetof(Mem,zMalloc) /* ** Make an shallow copy of pFrom into pTo. Prior contents of @@ -63867,10 +64951,15 @@ SQLITE_PRIVATE void sqlite3VdbeMemAboutToChange(Vdbe *pVdbe, Mem *pMem){ ** pFrom->z is used, then pTo->z points to the same thing as pFrom->z ** and flags gets srcType (either MEM_Ephem or MEM_Static). */ +static SQLITE_NOINLINE void vdbeClrCopy(Mem *pTo, const Mem *pFrom, int eType){ + vdbeMemClearExternAndSetNull(pTo); + assert( !VdbeMemDynamic(pTo) ); + sqlite3VdbeMemShallowCopy(pTo, pFrom, eType); +} SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){ assert( (pFrom->flags & MEM_RowSet)==0 ); assert( pTo->db==pFrom->db ); - if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo); + if( VdbeMemDynamic(pTo) ){ vdbeClrCopy(pTo,pFrom,srcType); return; } memcpy(pTo, pFrom, MEMCELLSIZE); if( (pFrom->flags&MEM_Static)==0 ){ pTo->flags &= ~(MEM_Dyn|MEM_Static|MEM_Ephem); @@ -63886,7 +64975,10 @@ SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int sr SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){ int rc = SQLITE_OK; - assert( pTo->db==pFrom->db ); + /* The pFrom==0 case in the following assert() is when an sqlite3_value + ** from sqlite3_value_dup() is used as the argument + ** to sqlite3_result_value(). */ + assert( pTo->db==pFrom->db || pFrom->db==0 ); assert( (pFrom->flags & MEM_RowSet)==0 ); if( VdbeMemDynamic(pTo) ) vdbeMemClearExternAndSetNull(pTo); memcpy(pTo, pFrom, MEMCELLSIZE); @@ -64033,6 +65125,32 @@ SQLITE_PRIVATE int sqlite3VdbeMemSetStr( ** If this routine fails for any reason (malloc returns NULL or unable ** to read from the disk) then the pMem is left in an inconsistent state. */ +static SQLITE_NOINLINE int vdbeMemFromBtreeResize( + BtCursor *pCur, /* Cursor pointing at record to retrieve. */ + u32 offset, /* Offset from the start of data to return bytes from. */ + u32 amt, /* Number of bytes to return. */ + int key, /* If true, retrieve from the btree key, not data. */ + Mem *pMem /* OUT: Return data in this Mem structure. */ +){ + int rc; + pMem->flags = MEM_Null; + if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ + if( key ){ + rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); + }else{ + rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); + } + if( rc==SQLITE_OK ){ + pMem->z[amt] = 0; + pMem->z[amt+1] = 0; + pMem->flags = MEM_Blob|MEM_Term; + pMem->n = (int)amt; + }else{ + sqlite3VdbeMemRelease(pMem); + } + } + return rc; +} SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( BtCursor *pCur, /* Cursor pointing at record to retrieve. */ u32 offset, /* Offset from the start of data to return bytes from. */ @@ -64062,22 +65180,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemFromBtree( pMem->flags = MEM_Blob|MEM_Ephem; pMem->n = (int)amt; }else{ - pMem->flags = MEM_Null; - if( SQLITE_OK==(rc = sqlite3VdbeMemClearAndResize(pMem, amt+2)) ){ - if( key ){ - rc = sqlite3BtreeKey(pCur, offset, amt, pMem->z); - }else{ - rc = sqlite3BtreeData(pCur, offset, amt, pMem->z); - } - if( rc==SQLITE_OK ){ - pMem->z[amt] = 0; - pMem->z[amt+1] = 0; - pMem->flags = MEM_Blob|MEM_Term; - pMem->n = (int)amt; - }else{ - sqlite3VdbeMemRelease(pMem); - } - } + rc = vdbeMemFromBtreeResize(pCur, offset, amt, key, pMem); } return rc; @@ -64398,7 +65501,7 @@ static int valueFromExpr( if( zVal==0 ) goto no_mem; sqlite3ValueSetStr(pVal, -1, zVal, SQLITE_UTF8, SQLITE_DYNAMIC); } - if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_NONE ){ + if( (op==TK_INTEGER || op==TK_FLOAT ) && affinity==SQLITE_AFF_BLOB ){ sqlite3ValueApplyAffinity(pVal, SQLITE_AFF_NUMERIC, SQLITE_UTF8); }else{ sqlite3ValueApplyAffinity(pVal, affinity, SQLITE_UTF8); @@ -64765,19 +65868,28 @@ SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value *v){ } /* -** Return the number of bytes in the sqlite3_value object assuming -** that it uses the encoding "enc" +** The sqlite3ValueBytes() routine returns the number of bytes in the +** sqlite3_value object assuming that it uses the encoding "enc". +** The valueBytes() routine is a helper function. */ +static SQLITE_NOINLINE int valueBytes(sqlite3_value *pVal, u8 enc){ + return valueToText(pVal, enc)!=0 ? pVal->n : 0; +} SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ Mem *p = (Mem*)pVal; - if( (p->flags & MEM_Blob)!=0 || sqlite3ValueText(pVal, enc) ){ + assert( (p->flags & MEM_Null)==0 || (p->flags & (MEM_Str|MEM_Blob))==0 ); + if( (p->flags & MEM_Str)!=0 && pVal->enc==enc ){ + return p->n; + } + if( (p->flags & MEM_Blob)!=0 ){ if( p->flags & MEM_Zero ){ return p->n + p->u.nZero; }else{ return p->n; } } - return 0; + if( p->flags & MEM_Null ) return 0; + return valueBytes(pVal, enc); } /************** End of vdbemem.c *********************************************/ @@ -64796,6 +65908,8 @@ SQLITE_PRIVATE int sqlite3ValueBytes(sqlite3_value *pVal, u8 enc){ ** This file contains code used for creating, destroying, and populating ** a VDBE (or an "sqlite3_stmt" as it is known to the outside world.) */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** Create a new virtual database engine. @@ -64820,6 +65934,17 @@ SQLITE_PRIVATE Vdbe *sqlite3VdbeCreate(Parse *pParse){ return p; } +/* +** Change the error string stored in Vdbe.zErrMsg +*/ +SQLITE_PRIVATE void sqlite3VdbeError(Vdbe *p, const char *zFormat, ...){ + va_list ap; + sqlite3DbFree(p->db, p->zErrMsg); + va_start(ap, zFormat); + p->zErrMsg = sqlite3VMPrintf(p->db, zFormat, ap); + va_end(ap); +} + /* ** Remember the SQL string for a prepared statement. */ @@ -65004,6 +66129,23 @@ SQLITE_PRIVATE int sqlite3VdbeAddOp4( return addr; } +/* +** Add an opcode that includes the p4 value with a P4_INT64 type. +*/ +SQLITE_PRIVATE int sqlite3VdbeAddOp4Dup8( + Vdbe *p, /* Add the opcode to this VM */ + int op, /* The new opcode */ + int p1, /* The P1 operand */ + int p2, /* The P2 operand */ + int p3, /* The P3 operand */ + const u8 *zP4, /* The P4 operand */ + int p4type /* P4 operand type */ +){ + char *p4copy = sqlite3DbMallocRaw(sqlite3VdbeDb(p), 8); + if( p4copy ) memcpy(p4copy, zP4, 8); + return sqlite3VdbeAddOp4(p, op, p1, p2, p3, p4copy, p4type); +} + /* ** Add an OP_ParseSchema opcode. This routine is broken out from ** sqlite3VdbeAddOp4() since it needs to also needs to mark all btrees @@ -65168,6 +66310,7 @@ static Op *opIterNext(VdbeOpIter *p){ ** * OP_VUpdate ** * OP_VRename ** * OP_FkCounter with P2==0 (immediate foreign key constraint) +** * OP_CreateTable and OP_InitCoroutine (for CREATE TABLE AS SELECT ...) ** ** Then check that the value of Parse.mayAbort is true if an ** ABORT may be thrown, or false otherwise. Return true if it does @@ -65179,6 +66322,8 @@ static Op *opIterNext(VdbeOpIter *p){ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ int hasAbort = 0; int hasFkCounter = 0; + int hasCreateTable = 0; + int hasInitCoroutine = 0; Op *pOp; VdbeOpIter sIter; memset(&sIter, 0, sizeof(sIter)); @@ -65193,6 +66338,8 @@ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ hasAbort = 1; break; } + if( opcode==OP_CreateTable ) hasCreateTable = 1; + if( opcode==OP_InitCoroutine ) hasInitCoroutine = 1; #ifndef SQLITE_OMIT_FOREIGN_KEY if( opcode==OP_FkCounter && pOp->p1==0 && pOp->p2==1 ){ hasFkCounter = 1; @@ -65206,7 +66353,8 @@ SQLITE_PRIVATE int sqlite3VdbeAssertMayAbort(Vdbe *v, int mayAbort){ ** through all opcodes and hasAbort may be set incorrectly. Return ** true for this case to prevent the assert() in the callers frame ** from failing. */ - return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter ); + return ( v->db->mallocFailed || hasAbort==mayAbort || hasFkCounter + || (hasCreateTable && hasInitCoroutine) ); } #endif /* SQLITE_DEBUG - the sqlite3AssertMayAbort() function */ @@ -65237,11 +66385,6 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){ /* NOTE: Be sure to update mkopcodeh.awk when adding or removing ** cases from this switch! */ switch( opcode ){ - case OP_Function: - case OP_AggStep: { - if( pOp->p5>nMaxArgs ) nMaxArgs = pOp->p5; - break; - } case OP_Transaction: { if( pOp->p2!=0 ) p->readOnly = 0; /* fall thru */ @@ -65485,6 +66628,10 @@ static void freeP4(sqlite3 *db, int p4type, void *p4){ if( p4 ){ assert( db ); switch( p4type ){ + case P4_FUNCCTX: { + freeEphemeralFunction(db, ((sqlite3_context*)p4)->pFunc); + /* Fall through into the next case */ + } case P4_REAL: case P4_INT64: case P4_DYNAMIC: @@ -65869,6 +67016,13 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){ sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); break; } +#ifdef SQLITE_DEBUG + case P4_FUNCCTX: { + FuncDef *pDef = pOp->p4.pCtx->pFunc; + sqlite3_snprintf(nTemp, zTemp, "%s(%d)", pDef->zName, pDef->nArg); + break; + } +#endif case P4_INT64: { sqlite3_snprintf(nTemp, zTemp, "%lld", *pOp->p4.pI64); break; @@ -65989,12 +67143,11 @@ SQLITE_PRIVATE void sqlite3VdbeEnter(Vdbe *p){ /* ** Unlock all of the btrees previously locked by a call to sqlite3VdbeEnter(). */ -SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ +static SQLITE_NOINLINE void vdbeLeave(Vdbe *p){ int i; sqlite3 *db; Db *aDb; int nDb; - if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ db = p->db; aDb = db->aDb; nDb = db->nDb; @@ -66004,6 +67157,10 @@ SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ } } } +SQLITE_PRIVATE void sqlite3VdbeLeave(Vdbe *p){ + if( DbMaskAllZero(p->lockMask) ) return; /* The common case */ + vdbeLeave(p); +} #endif #if defined(VDBE_PROFILE) || defined(SQLITE_DEBUG) @@ -66176,7 +67333,7 @@ SQLITE_PRIVATE int sqlite3VdbeList( }else if( db->u1.isInterrupted ){ p->rc = SQLITE_INTERRUPT; rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(p->rc)); + sqlite3VdbeError(p, sqlite3ErrStr(p->rc)); }else{ char *zP4; Op *pOp; @@ -67079,7 +68236,7 @@ SQLITE_PRIVATE int sqlite3VdbeCheckFk(Vdbe *p, int deferred){ ){ p->rc = SQLITE_CONSTRAINT_FOREIGNKEY; p->errorAction = OE_Abort; - sqlite3SetString(&p->zErrMsg, db, "FOREIGN KEY constraint failed"); + sqlite3VdbeError(p, "FOREIGN KEY constraint failed"); return SQLITE_ERROR; } return SQLITE_OK; @@ -67700,6 +68857,13 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialType(Mem *pMem, int file_format){ return ((n*2) + 12 + ((flags&MEM_Str)!=0)); } +/* +** The sizes for serial types less than 12 +*/ +static const u8 sqlite3SmallTypeSizes[] = { + 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 +}; + /* ** Return the length of the data corresponding to the supplied serial-type. */ @@ -67707,8 +68871,7 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialTypeLen(u32 serial_type){ if( serial_type>=12 ){ return (serial_type-12)/2; }else{ - static const u8 aSize[] = { 0, 1, 2, 3, 4, 6, 8, 8, 0, 0, 0, 0 }; - return aSize[serial_type]; + return sqlite3SmallTypeSizes[serial_type]; } } @@ -67792,7 +68955,7 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialPut(u8 *buf, Mem *pMem, u32 serial_type){ }else{ v = pMem->u.i; } - len = i = sqlite3VdbeSerialTypeLen(serial_type); + len = i = sqlite3SmallTypeSizes[serial_type]; assert( i>0 ); do{ buf[--i] = (u8)(v&0xFF); @@ -68077,6 +69240,7 @@ static int vdbeRecordCompareDebug( /* mem1.u.i = 0; // not needed, here to silence compiler warning */ idx1 = getVarint32(aKey1, szHdr1); + if( szHdr1>98307 ) return SQLITE_CORRUPT; d1 = szHdr1; assert( pKeyInfo->nField+pKeyInfo->nXField>=pPKey2->nField || CORRUPT_DB ); assert( pKeyInfo->aSortOrder!=0 ); @@ -68422,7 +69586,7 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( if( pRhs->flags & MEM_Int ){ serial_type = aKey1[idx1]; testcase( serial_type==12 ); - if( serial_type>=12 ){ + if( serial_type>=10 ){ rc = +1; }else if( serial_type==0 ){ rc = -1; @@ -68448,7 +69612,11 @@ SQLITE_PRIVATE int sqlite3VdbeRecordCompareWithSkip( /* RHS is real */ else if( pRhs->flags & MEM_Real ){ serial_type = aKey1[idx1]; - if( serial_type>=12 ){ + if( serial_type>=10 ){ + /* Serial types 12 or greater are strings and blobs (greater than + ** numbers). Types 10 and 11 are currently "reserved for future + ** use", so it doesn't really matter what the results of comparing + ** them to numberic values are. */ rc = +1; }else if( serial_type==0 ){ rc = -1; @@ -68817,7 +69985,7 @@ SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){ if( unlikely(typeRowid<1 || typeRowid>9 || typeRowid==7) ){ goto idx_rowid_corruption; } - lenRowid = sqlite3VdbeSerialTypeLen(typeRowid); + lenRowid = sqlite3SmallTypeSizes[typeRowid]; testcase( (u32)m.n==szHdr+lenRowid ); if( unlikely((u32)m.n<szHdr+lenRowid) ){ goto idx_rowid_corruption; @@ -68990,6 +70158,8 @@ SQLITE_PRIVATE void sqlite3VtabImportErrmsg(Vdbe *p, sqlite3_vtab *pVtab){ ** This file contains code use to implement APIs that are part of the ** VDBE. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_OMIT_DEPRECATED /* @@ -69028,6 +70198,31 @@ static int vdbeSafetyNotNull(Vdbe *p){ } } +#ifndef SQLITE_OMIT_TRACE +/* +** Invoke the profile callback. This routine is only called if we already +** know that the profile callback is defined and needs to be invoked. +*/ +static SQLITE_NOINLINE void invokeProfileCallback(sqlite3 *db, Vdbe *p){ + sqlite3_int64 iNow; + assert( p->startTime>0 ); + assert( db->xProfile!=0 ); + assert( db->init.busy==0 ); + assert( p->zSql!=0 ); + sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); + db->xProfile(db->pProfileArg, p->zSql, (iNow - p->startTime)*1000000); + p->startTime = 0; +} +/* +** The checkProfileCallback(DB,P) macro checks to see if a profile callback +** is needed, and it invokes the callback if it is needed. +*/ +# define checkProfileCallback(DB,P) \ + if( ((P)->startTime)>0 ){ invokeProfileCallback(DB,P); } +#else +# define checkProfileCallback(DB,P) /*no-op*/ +#endif + /* ** The following routine destroys a virtual machine that is created by ** the sqlite3_compile() routine. The integer returned is an SQLITE_ @@ -69048,6 +70243,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_finalize(sqlite3_stmt *pStmt){ sqlite3 *db = v->db; if( vdbeSafety(v) ) return SQLITE_MISUSE_BKPT; sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); rc = sqlite3VdbeFinalize(v); rc = sqlite3ApiExit(db, rc); sqlite3LeaveMutexAndCloseZombie(db); @@ -69069,12 +70265,14 @@ SQLITE_API int SQLITE_STDCALL sqlite3_reset(sqlite3_stmt *pStmt){ rc = SQLITE_OK; }else{ Vdbe *v = (Vdbe*)pStmt; - sqlite3_mutex_enter(v->db->mutex); + sqlite3 *db = v->db; + sqlite3_mutex_enter(db->mutex); + checkProfileCallback(db, v); rc = sqlite3VdbeReset(v); sqlite3VdbeRewind(v); - assert( (rc & (v->db->errMask))==rc ); - rc = sqlite3ApiExit(v->db, rc); - sqlite3_mutex_leave(v->db->mutex); + assert( (rc & (db->errMask))==rc ); + rc = sqlite3ApiExit(db, rc); + sqlite3_mutex_leave(db->mutex); } return rc; } @@ -69109,7 +70307,10 @@ SQLITE_API int SQLITE_STDCALL sqlite3_clear_bindings(sqlite3_stmt *pStmt){ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_blob(sqlite3_value *pVal){ Mem *p = (Mem*)pVal; if( p->flags & (MEM_Blob|MEM_Str) ){ - sqlite3VdbeMemExpandBlob(p); + if( sqlite3VdbeMemExpandBlob(p)!=SQLITE_OK ){ + assert( p->flags==MEM_Null && p->z==0 ); + return 0; + } p->flags |= MEM_Blob; return p->n ? p->z : 0; }else{ @@ -69187,6 +70388,36 @@ SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value* pVal){ return aType[pVal->flags&MEM_AffMask]; } +/* Make a copy of an sqlite3_value object +*/ +SQLITE_API sqlite3_value *SQLITE_STDCALL sqlite3_value_dup(const sqlite3_value *pOrig){ + sqlite3_value *pNew; + if( pOrig==0 ) return 0; + pNew = sqlite3_malloc( sizeof(*pNew) ); + if( pNew==0 ) return 0; + memset(pNew, 0, sizeof(*pNew)); + memcpy(pNew, pOrig, MEMCELLSIZE); + pNew->flags &= ~MEM_Dyn; + pNew->db = 0; + if( pNew->flags&(MEM_Str|MEM_Blob) ){ + pNew->flags &= ~(MEM_Static|MEM_Dyn); + pNew->flags |= MEM_Ephem; + if( sqlite3VdbeMemMakeWriteable(pNew)!=SQLITE_OK ){ + sqlite3ValueFree(pNew); + pNew = 0; + } + } + return pNew; +} + +/* Destroy an sqlite3_value object previously obtained from +** sqlite3_value_dup(). +*/ +SQLITE_API void SQLITE_STDCALL sqlite3_value_free(sqlite3_value *pOld){ + sqlite3ValueFree(pOld); +} + + /**************************** sqlite3_result_ ******************************* ** The following routines are used by user-defined functions to specify ** the function result. @@ -69341,6 +70572,15 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context *pCtx, in assert( sqlite3_mutex_held(pCtx->pOut->db->mutex) ); sqlite3VdbeMemSetZeroBlob(pCtx->pOut, n); } +SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context *pCtx, u64 n){ + Mem *pOut = pCtx->pOut; + assert( sqlite3_mutex_held(pOut->db->mutex) ); + if( n>(u64)pOut->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + return SQLITE_TOOBIG; + } + sqlite3VdbeMemSetZeroBlob(pCtx->pOut, (int)n); + return SQLITE_OK; +} SQLITE_API void SQLITE_STDCALL sqlite3_result_error_code(sqlite3_context *pCtx, int errCode){ pCtx->isError = errCode; pCtx->fErrorOrAux = 1; @@ -69395,6 +70635,7 @@ static int doWalCallbacks(sqlite3 *db){ return rc; } + /* ** Execute the statement pStmt, either until a row of data is ready, the ** statement is completely executed or an error occurs. @@ -69463,8 +70704,10 @@ static int sqlite3Step(Vdbe *p){ ); #ifndef SQLITE_OMIT_TRACE - if( db->xProfile && !db->init.busy ){ + if( db->xProfile && !db->init.busy && p->zSql ){ sqlite3OsCurrentTimeInt64(db->pVfs, &p->startTime); + }else{ + assert( p->startTime==0 ); } #endif @@ -69488,13 +70731,8 @@ static int sqlite3Step(Vdbe *p){ } #ifndef SQLITE_OMIT_TRACE - /* Invoke the profile callback if there is one - */ - if( rc!=SQLITE_ROW && db->xProfile && !db->init.busy && p->zSql ){ - sqlite3_int64 iNow; - sqlite3OsCurrentTimeInt64(db->pVfs, &iNow); - db->xProfile(db->pProfileArg, p->zSql, (iNow - p->startTime)*1000000); - } + /* If the statement completed successfully, invoke the profile callback */ + if( rc!=SQLITE_ROW ) checkProfileCallback(db, p); #endif if( rc==SQLITE_DONE ){ @@ -70322,6 +71560,20 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt *pStmt, int i, } return rc; } +SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt *pStmt, int i, sqlite3_uint64 n){ + int rc; + Vdbe *p = (Vdbe *)pStmt; + sqlite3_mutex_enter(p->db->mutex); + if( n>(u64)p->db->aLimit[SQLITE_LIMIT_LENGTH] ){ + rc = SQLITE_TOOBIG; + }else{ + assert( (n & 0x7FFFFFFF)==n ); + rc = sqlite3_bind_zeroblob(pStmt, i, n); + } + rc = sqlite3ApiExit(p->db, rc); + sqlite3_mutex_leave(p->db->mutex); + return rc; +} /* ** Return the number of wildcards that can be potentially bound to. @@ -70571,6 +71823,8 @@ SQLITE_API void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt *pStmt ** ** The Vdbe parse-tree explainer is also found here. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_OMIT_TRACE @@ -70763,6 +72017,8 @@ SQLITE_PRIVATE char *sqlite3VdbeExpandSql( ** in this file for details. If in doubt, do not deviate from existing ** commenting and indentation practices when changing or adding code. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** Invoke this macro on memory cells just prior to changing the @@ -71013,7 +72269,7 @@ static void applyNumericAffinity(Mem *pRec, int bTryForInt){ ** SQLITE_AFF_TEXT: ** Convert pRec to a text representation. ** -** SQLITE_AFF_NONE: +** SQLITE_AFF_BLOB: ** No-op. pRec is unchanged. */ static void applyAffinity( @@ -71039,6 +72295,7 @@ static void applyAffinity( if( 0==(pRec->flags&MEM_Str) && (pRec->flags&(MEM_Real|MEM_Int)) ){ sqlite3VdbeMemStringify(pRec, enc, 1); } + pRec->flags &= ~(MEM_Real|MEM_Int); } } @@ -71413,13 +72670,9 @@ SQLITE_PRIVATE int sqlite3VdbeExec( sqlite3VdbeIOTraceSql(p); #ifndef SQLITE_OMIT_PROGRESS_CALLBACK if( db->xProgress ){ + u32 iPrior = p->aCounter[SQLITE_STMTSTATUS_VM_STEP]; assert( 0 < db->nProgressOps ); - nProgressLimit = (unsigned)p->aCounter[SQLITE_STMTSTATUS_VM_STEP]; - if( nProgressLimit==0 ){ - nProgressLimit = db->nProgressOps; - }else{ - nProgressLimit %= (unsigned)db->nProgressOps; - } + nProgressLimit = db->nProgressOps - (iPrior % db->nProgressOps); } #endif #ifdef SQLITE_DEBUG @@ -71799,12 +73052,11 @@ case OP_Halt: { assert( zType!=0 || pOp->p4.z!=0 ); zLogFmt = "abort at %d in [%s]: %s"; if( zType && pOp->p4.z ){ - sqlite3SetString(&p->zErrMsg, db, "%s constraint failed: %s", - zType, pOp->p4.z); + sqlite3VdbeError(p, "%s constraint failed: %s", zType, pOp->p4.z); }else if( pOp->p4.z ){ - sqlite3SetString(&p->zErrMsg, db, "%s", pOp->p4.z); + sqlite3VdbeError(p, "%s", pOp->p4.z); }else{ - sqlite3SetString(&p->zErrMsg, db, "%s constraint failed", zType); + sqlite3VdbeError(p, "%s constraint failed", zType); } sqlite3_log(pOp->p1, zLogFmt, pcx, p->zSql, p->zErrMsg); } @@ -72382,10 +73634,10 @@ case OP_CollSeq: { break; } -/* Opcode: Function P1 P2 P3 P4 P5 +/* Opcode: Function0 P1 P2 P3 P4 P5 ** Synopsis: r[P3]=func(r[P2@P5]) ** -** Invoke a user function (P4 is a pointer to a Function structure that +** Invoke a user function (P4 is a pointer to a FuncDef object that ** defines the function) with P5 arguments taken from register P2 and ** successors. The result of the function is stored in register P3. ** Register P3 must not be one of the function inputs. @@ -72397,59 +73649,100 @@ case OP_CollSeq: { ** sqlite3_set_auxdata() API may be safely retained until the next ** invocation of this opcode. ** -** See also: AggStep and AggFinal +** See also: Function, AggStep, AggFinal */ -case OP_Function: { - int i; - Mem *pArg; - sqlite3_context ctx; - sqlite3_value **apVal; +/* Opcode: Function P1 P2 P3 P4 P5 +** Synopsis: r[P3]=func(r[P2@P5]) +** +** Invoke a user function (P4 is a pointer to an sqlite3_context object that +** contains a pointer to the function to be run) with P5 arguments taken +** from register P2 and successors. The result of the function is stored +** in register P3. Register P3 must not be one of the function inputs. +** +** P1 is a 32-bit bitmask indicating whether or not each argument to the +** function was determined to be constant at compile time. If the first +** argument was constant then bit 0 of P1 is set. This is used to determine +** whether meta data associated with a user function argument using the +** sqlite3_set_auxdata() API may be safely retained until the next +** invocation of this opcode. +** +** SQL functions are initially coded as OP_Function0 with P4 pointing +** to a FuncDef object. But on first evaluation, the P4 operand is +** automatically converted into an sqlite3_context object and the operation +** changed to this OP_Function opcode. In this way, the initialization of +** the sqlite3_context object occurs only once, rather than once for each +** evaluation of the function. +** +** See also: Function0, AggStep, AggFinal +*/ +case OP_Function0: { int n; + sqlite3_context *pCtx; + assert( pOp->p4type==P4_FUNCDEF ); n = pOp->p5; - apVal = p->apArg; - assert( apVal || n==0 ); assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pOut = &aMem[pOp->p3]; - memAboutToChange(p, ctx.pOut); - assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); assert( pOp->p3<pOp->p2 || pOp->p3>=pOp->p2+n ); - pArg = &aMem[pOp->p2]; - for(i=0; i<n; i++, pArg++){ - assert( memIsValid(pArg) ); - apVal[i] = pArg; - Deephemeralize(pArg); - REGISTER_TRACE(pOp->p2+i, pArg); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pOut = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_Function; + /* Fall through into OP_Function */ +} +case OP_Function: { + int i; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + pOut = &aMem[pOp->p3]; + if( pCtx->pOut != pOut ){ + pCtx->pOut = pOut; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; } - assert( pOp->p4type==P4_FUNCDEF ); - ctx.pFunc = pOp->p4.pFunc; - ctx.iOp = (int)(pOp - aOp); - ctx.pVdbe = p; - MemSetTypeFlag(ctx.pOut, MEM_Null); - ctx.fErrorOrAux = 0; + memAboutToChange(p, pCtx->pOut); +#ifdef SQLITE_DEBUG + for(i=0; i<pCtx->argc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + MemSetTypeFlag(pCtx->pOut, MEM_Null); + pCtx->fErrorOrAux = 0; db->lastRowid = lastRowid; - (*ctx.pFunc->xFunc)(&ctx, n, apVal); /* IMP: R-24505-23230 */ + (*pCtx->pFunc->xFunc)(pCtx, pCtx->argc, pCtx->argv); /* IMP: R-24505-23230 */ lastRowid = db->lastRowid; /* Remember rowid changes made by xFunc */ /* If the function returned an error, throw an exception */ - if( ctx.fErrorOrAux ){ - if( ctx.isError ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(ctx.pOut)); - rc = ctx.isError; + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(pCtx->pOut)); + rc = pCtx->isError; } - sqlite3VdbeDeleteAuxData(p, (int)(pOp - aOp), pOp->p1); + sqlite3VdbeDeleteAuxData(p, pCtx->iOp, pOp->p1); } /* Copy the result of the function into register P3 */ - sqlite3VdbeChangeEncoding(ctx.pOut, encoding); - if( sqlite3VdbeMemTooBig(ctx.pOut) ){ - goto too_big; + if( pOut->flags & (MEM_Str|MEM_Blob) ){ + sqlite3VdbeChangeEncoding(pCtx->pOut, encoding); + if( sqlite3VdbeMemTooBig(pCtx->pOut) ) goto too_big; } - REGISTER_TRACE(pOp->p3, ctx.pOut); - UPDATE_MAX_BLOBSIZE(ctx.pOut); + REGISTER_TRACE(pOp->p3, pCtx->pOut); + UPDATE_MAX_BLOBSIZE(pCtx->pOut); break; } @@ -72612,9 +73905,9 @@ case OP_RealAffinity: { /* in1 */ ** A NULL value is not changed by this routine. It remains NULL. */ case OP_Cast: { /* in1 */ - assert( pOp->p2>=SQLITE_AFF_NONE && pOp->p2<=SQLITE_AFF_REAL ); + assert( pOp->p2>=SQLITE_AFF_BLOB && pOp->p2<=SQLITE_AFF_REAL ); testcase( pOp->p2==SQLITE_AFF_TEXT ); - testcase( pOp->p2==SQLITE_AFF_NONE ); + testcase( pOp->p2==SQLITE_AFF_BLOB ); testcase( pOp->p2==SQLITE_AFF_NUMERIC ); testcase( pOp->p2==SQLITE_AFF_INTEGER ); testcase( pOp->p2==SQLITE_AFF_REAL ); @@ -73425,7 +74718,7 @@ case OP_Affinity: { ** The mapping from character to affinity is given by the SQLITE_AFF_ ** macros defined in sqliteInt.h. ** -** If P4 is NULL then all index fields have the affinity NONE. +** If P4 is NULL then all index fields have the affinity BLOB. */ case OP_MakeRecord: { u8 *zNewRecord; /* A buffer to hold the data for the new record */ @@ -73497,7 +74790,7 @@ case OP_MakeRecord: { len = sqlite3VdbeSerialTypeLen(serial_type); if( pRec->flags & MEM_Zero ){ if( nData ){ - sqlite3VdbeMemExpandBlob(pRec); + if( sqlite3VdbeMemExpandBlob(pRec) ) goto no_mem; }else{ nZero += pRec->u.nZero; len -= pRec->u.nZero; @@ -73623,8 +74916,7 @@ case OP_Savepoint: { /* A new savepoint cannot be created if there are active write ** statements (i.e. open read/write incremental blob handles). */ - sqlite3SetString(&p->zErrMsg, db, "cannot open savepoint - " - "SQL statements in progress"); + sqlite3VdbeError(p, "cannot open savepoint - SQL statements in progress"); rc = SQLITE_BUSY; }else{ nName = sqlite3Strlen30(zName); @@ -73675,15 +74967,14 @@ case OP_Savepoint: { iSavepoint++; } if( !pSavepoint ){ - sqlite3SetString(&p->zErrMsg, db, "no such savepoint: %s", zName); + sqlite3VdbeError(p, "no such savepoint: %s", zName); rc = SQLITE_ERROR; }else if( db->nVdbeWrite>0 && p1==SAVEPOINT_RELEASE ){ /* It is not possible to release (commit) a savepoint if there are ** active write statements. */ - sqlite3SetString(&p->zErrMsg, db, - "cannot release savepoint - SQL statements in progress" - ); + sqlite3VdbeError(p, "cannot release savepoint - " + "SQL statements in progress"); rc = SQLITE_BUSY; }else{ @@ -73789,23 +75080,12 @@ case OP_AutoCommit: { assert( db->nVdbeActive>0 ); /* At least this one VM is active */ assert( p->bIsReader ); -#if 0 - if( turnOnAC && iRollback && db->nVdbeActive>1 ){ - /* If this instruction implements a ROLLBACK and other VMs are - ** still running, and a transaction is active, return an error indicating - ** that the other VMs must complete first. - */ - sqlite3SetString(&p->zErrMsg, db, "cannot rollback transaction - " - "SQL statements in progress"); - rc = SQLITE_BUSY; - }else -#endif if( turnOnAC && !iRollback && db->nVdbeWrite>0 ){ /* If this instruction implements a COMMIT and other VMs are writing ** return an error indicating that the other VMs must complete first. */ - sqlite3SetString(&p->zErrMsg, db, "cannot commit transaction - " - "SQL statements in progress"); + sqlite3VdbeError(p, "cannot commit transaction - " + "SQL statements in progress"); rc = SQLITE_BUSY; }else if( desiredAutoCommit!=db->autoCommit ){ if( iRollback ){ @@ -73832,7 +75112,7 @@ case OP_AutoCommit: { } goto vdbe_return; }else{ - sqlite3SetString(&p->zErrMsg, db, + sqlite3VdbeError(p, (!desiredAutoCommit)?"cannot start a transaction within a transaction":( (iRollback)?"cannot rollback - no transaction is active": "cannot commit - no transaction is active")); @@ -74356,6 +75636,26 @@ case OP_Close: { break; } +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK +/* Opcode: ColumnsUsed P1 * * P4 * +** +** This opcode (which only exists if SQLite was compiled with +** SQLITE_ENABLE_COLUMN_USED_MASK) identifies which columns of the +** table or index for cursor P1 are used. P4 is a 64-bit integer +** (P4_INT64) in which the first 63 bits are one for each of the +** first 63 columns of the table or index that are actually used +** by the cursor. The high-order bit is set if any column after +** the 64th is used. +*/ +case OP_ColumnsUsed: { + VdbeCursor *pC; + pC = p->apCsr[pOp->p1]; + assert( pC->pCursor ); + pC->maskUsed = *(u64*)pOp->p4.pI64; + break; +} +#endif + /* Opcode: SeekGE P1 P2 P3 P4 * ** Synopsis: key=r[P3@P4] ** @@ -74844,9 +76144,8 @@ case OP_NewRowid: { /* out2 */ assert( pOp->p1>=0 && pOp->p1<p->nCursor ); pC = p->apCsr[pOp->p1]; assert( pC!=0 ); - if( NEVER(pC->pCursor==0) ){ - /* The zero initialization above is all that is needed */ - }else{ + assert( pC->pCursor!=0 ); + { /* The next rowid or record number (different terms for the same ** thing) is obtained in a two-step algorithm. ** @@ -75585,7 +76884,6 @@ next_tail: case OP_SorterInsert: /* in2 */ case OP_IdxInsert: { /* in2 */ VdbeCursor *pC; - BtCursor *pCrsr; int nKey; const char *zKey; @@ -75595,18 +76893,17 @@ case OP_IdxInsert: { /* in2 */ assert( isSorter(pC)==(pOp->opcode==OP_SorterInsert) ); pIn2 = &aMem[pOp->p2]; assert( pIn2->flags & MEM_Blob ); - pCrsr = pC->pCursor; if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++; - assert( pCrsr!=0 ); + assert( pC->pCursor!=0 ); assert( pC->isTable==0 ); rc = ExpandBlob(pIn2); if( rc==SQLITE_OK ){ - if( isSorter(pC) ){ + if( pOp->opcode==OP_SorterInsert ){ rc = sqlite3VdbeSorterWrite(pC, pIn2); }else{ nKey = pIn2->n; zKey = pIn2->z; - rc = sqlite3BtreeInsert(pCrsr, zKey, nKey, "", 0, 0, pOp->p3, + rc = sqlite3BtreeInsert(pC->pCursor, zKey, nKey, "", 0, 0, pOp->p3, ((pOp->p5 & OPFLAG_USESEEKRESULT) ? pC->seekResult : 0) ); assert( pC->deferredMoveto==0 ); @@ -76265,7 +77562,7 @@ case OP_Program: { /* jump */ if( p->nFrame>=db->aLimit[SQLITE_LIMIT_TRIGGER_DEPTH] ){ rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, "too many levels of trigger recursion"); + sqlite3VdbeError(p, "too many levels of trigger recursion"); break; } @@ -76526,57 +77823,101 @@ case OP_JumpZeroIncr: { /* jump, in1 */ break; } -/* Opcode: AggStep * P2 P3 P4 P5 +/* Opcode: AggStep0 * P2 P3 P4 P5 ** Synopsis: accum=r[P3] step(r[P2@P5]) ** ** Execute the step function for an aggregate. The ** function has P5 arguments. P4 is a pointer to the FuncDef -** structure that specifies the function. Use register -** P3 as the accumulator. +** structure that specifies the function. Register P3 is the +** accumulator. ** ** The P5 arguments are taken from register P2 and its ** successors. */ -case OP_AggStep: { +/* Opcode: AggStep * P2 P3 P4 P5 +** Synopsis: accum=r[P3] step(r[P2@P5]) +** +** Execute the step function for an aggregate. The +** function has P5 arguments. P4 is a pointer to an sqlite3_context +** object that is used to run the function. Register P3 is +** as the accumulator. +** +** The P5 arguments are taken from register P2 and its +** successors. +** +** This opcode is initially coded as OP_AggStep0. On first evaluation, +** the FuncDef stored in P4 is converted into an sqlite3_context and +** the opcode is changed. In this way, the initialization of the +** sqlite3_context only happens once, instead of on each call to the +** step function. +*/ +case OP_AggStep0: { int n; + sqlite3_context *pCtx; + + assert( pOp->p4type==P4_FUNCDEF ); + n = pOp->p5; + assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); + assert( n==0 || (pOp->p2>0 && pOp->p2+n<=(p->nMem-p->nCursor)+1) ); + assert( pOp->p3<pOp->p2 || pOp->p3>=pOp->p2+n ); + pCtx = sqlite3DbMallocRaw(db, sizeof(*pCtx) + (n-1)*sizeof(sqlite3_value*)); + if( pCtx==0 ) goto no_mem; + pCtx->pMem = 0; + pCtx->pFunc = pOp->p4.pFunc; + pCtx->iOp = (int)(pOp - aOp); + pCtx->pVdbe = p; + pCtx->argc = n; + pOp->p4type = P4_FUNCCTX; + pOp->p4.pCtx = pCtx; + pOp->opcode = OP_AggStep; + /* Fall through into OP_AggStep */ +} +case OP_AggStep: { int i; + sqlite3_context *pCtx; Mem *pMem; - Mem *pRec; Mem t; - sqlite3_context ctx; - sqlite3_value **apVal; - n = pOp->p5; - assert( n>=0 ); - pRec = &aMem[pOp->p2]; - apVal = p->apArg; - assert( apVal || n==0 ); - for(i=0; i<n; i++, pRec++){ - assert( memIsValid(pRec) ); - apVal[i] = pRec; - memAboutToChange(p, pRec); + assert( pOp->p4type==P4_FUNCCTX ); + pCtx = pOp->p4.pCtx; + pMem = &aMem[pOp->p3]; + + /* If this function is inside of a trigger, the register array in aMem[] + ** might change from one evaluation to the next. The next block of code + ** checks to see if the register array has changed, and if so it + ** reinitializes the relavant parts of the sqlite3_context object */ + if( pCtx->pMem != pMem ){ + pCtx->pMem = pMem; + for(i=pCtx->argc-1; i>=0; i--) pCtx->argv[i] = &aMem[pOp->p2+i]; } - ctx.pFunc = pOp->p4.pFunc; - assert( pOp->p3>0 && pOp->p3<=(p->nMem-p->nCursor) ); - ctx.pMem = pMem = &aMem[pOp->p3]; + +#ifdef SQLITE_DEBUG + for(i=0; i<pCtx->argc; i++){ + assert( memIsValid(pCtx->argv[i]) ); + REGISTER_TRACE(pOp->p2+i, pCtx->argv[i]); + } +#endif + pMem->n++; sqlite3VdbeMemInit(&t, db, MEM_Null); - ctx.pOut = &t; - ctx.isError = 0; - ctx.pVdbe = p; - ctx.iOp = (int)(pOp - aOp); - ctx.skipFlag = 0; - (ctx.pFunc->xStep)(&ctx, n, apVal); /* IMP: R-24505-23230 */ - if( ctx.isError ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&t)); - rc = ctx.isError; + pCtx->pOut = &t; + pCtx->fErrorOrAux = 0; + pCtx->skipFlag = 0; + (pCtx->pFunc->xStep)(pCtx,pCtx->argc,pCtx->argv); /* IMP: R-24505-23230 */ + if( pCtx->fErrorOrAux ){ + if( pCtx->isError ){ + sqlite3VdbeError(p, "%s", sqlite3_value_text(&t)); + rc = pCtx->isError; + } + sqlite3VdbeMemRelease(&t); + }else{ + assert( t.flags==MEM_Null ); } - if( ctx.skipFlag ){ + if( pCtx->skipFlag ){ assert( pOp[-1].opcode==OP_CollSeq ); i = pOp[-1].p1; if( i ) sqlite3VdbeMemSetInt64(&aMem[i], 1); } - sqlite3VdbeMemRelease(&t); break; } @@ -76600,7 +77941,7 @@ case OP_AggFinal: { assert( (pMem->flags & ~(MEM_Null|MEM_Agg))==0 ); rc = sqlite3VdbeMemFinalize(pMem, pOp->p4.pFunc); if( rc ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(pMem)); + sqlite3VdbeError(p, "%s", sqlite3_value_text(pMem)); } sqlite3VdbeChangeEncoding(pMem, encoding); UPDATE_MAX_BLOBSIZE(pMem); @@ -76705,7 +78046,7 @@ case OP_JournalMode: { /* out2 */ ){ if( !db->autoCommit || db->nVdbeRead>1 ){ rc = SQLITE_ERROR; - sqlite3SetString(&p->zErrMsg, db, + sqlite3VdbeError(p, "cannot change %s wal mode from within a transaction", (eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of") ); @@ -76836,7 +78177,7 @@ case OP_TableLock: { rc = sqlite3BtreeLockTable(db->aDb[p1].pBt, pOp->p2, isWriteLock); if( (rc&0xFF)==SQLITE_LOCKED ){ const char *z = pOp->p4.z; - sqlite3SetString(&p->zErrMsg, db, "database table is locked: %s", z); + sqlite3VdbeError(p, "database table is locked: %s", z); } } break; @@ -77384,7 +78725,7 @@ vdbe_return: ** is encountered. */ too_big: - sqlite3SetString(&p->zErrMsg, db, "string or blob too big"); + sqlite3VdbeError(p, "string or blob too big"); rc = SQLITE_TOOBIG; goto vdbe_error_halt; @@ -77392,7 +78733,7 @@ too_big: */ no_mem: db->mallocFailed = 1; - sqlite3SetString(&p->zErrMsg, db, "out of memory"); + sqlite3VdbeError(p, "out of memory"); rc = SQLITE_NOMEM; goto vdbe_error_halt; @@ -77403,7 +78744,7 @@ abort_due_to_error: assert( p->zErrMsg==0 ); if( db->mallocFailed ) rc = SQLITE_NOMEM; if( rc!=SQLITE_IOERR_NOMEM ){ - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3VdbeError(p, "%s", sqlite3ErrStr(rc)); } goto vdbe_error_halt; @@ -77414,7 +78755,7 @@ abort_due_to_interrupt: assert( db->u1.isInterrupted ); rc = SQLITE_INTERRUPT; p->rc = rc; - sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3VdbeError(p, "%s", sqlite3ErrStr(rc)); goto vdbe_error_halt; } @@ -77436,6 +78777,8 @@ abort_due_to_interrupt: ** This file contains code used to implement incremental BLOB I/O. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #ifndef SQLITE_OMIT_INCRBLOB @@ -78034,6 +79377,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_i ** thread to merge the output of each of the others to a single PMA for ** the main thread to read from. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ /* ** If SQLITE_DEBUG_SORTER_THREADS is defined, this module outputs various @@ -80650,6 +81995,7 @@ SQLITE_PRIVATE int sqlite3VdbeSorterCompare( ** 2) The sqlite3JournalCreate() function is called. */ #ifdef SQLITE_ENABLE_ATOMIC_WRITE +/* #include "sqliteInt.h" */ /* @@ -80897,6 +82243,7 @@ SQLITE_PRIVATE int sqlite3JournalSize(sqlite3_vfs *pVfs){ ** The in-memory rollback journal is used to journal transactions for ** ":memory:" databases and when the journal_mode=MEMORY pragma is used. */ +/* #include "sqliteInt.h" */ /* Forward references to internal structures */ typedef struct MemJournal MemJournal; @@ -81152,6 +82499,7 @@ SQLITE_PRIVATE int sqlite3MemJournalSize(void){ ** This file contains routines used for walking the parser tree for ** an SQL statement. */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* #include <string.h> */ @@ -81310,6 +82658,7 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){ ** resolve all identifiers by associating them with a particular ** table and column. */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* #include <string.h> */ @@ -81653,7 +83002,7 @@ static int lookupName( break; } } - if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && HasRowid(pTab) ){ + if( iCol>=pTab->nCol && sqlite3IsRowid(zCol) && VisibleRowid(pTab) ){ /* IMP: R-51414-32910 */ /* IMP: R-44911-55124 */ iCol = -1; @@ -81683,7 +83032,7 @@ static int lookupName( ** Perhaps the name is a reference to the ROWID */ if( cnt==0 && cntTab==1 && pMatch && sqlite3IsRowid(zCol) - && HasRowid(pMatch->pTab) ){ + && VisibleRowid(pMatch->pTab) ){ cnt = 1; pExpr->iColumn = -1; /* IMP: R-44911-55124 */ pExpr->affinity = SQLITE_AFF_INTEGER; @@ -82626,6 +83975,13 @@ static int resolveSelectStep(Walker *pWalker, Select *p){ } } + /* If this is part of a compound SELECT, check that it has the right + ** number of expressions in the select list. */ + if( p->pNext && p->pEList->nExpr!=p->pNext->pEList->nExpr ){ + sqlite3SelectWrongNumTermsError(pParse, p->pNext); + return WRC_Abort; + } + /* Advance to the next term of the compound */ p = p->pPrior; @@ -82815,6 +84171,7 @@ SQLITE_PRIVATE void sqlite3ResolveSelfReference( ** This file contains routines used for analyzing expressions and ** for generating VDBE code that evaluates expressions in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Return the 'affinity' of the expression pExpr if any. @@ -82993,13 +84350,13 @@ SQLITE_PRIVATE char sqlite3CompareAffinity(Expr *pExpr, char aff2){ if( sqlite3IsNumericAffinity(aff1) || sqlite3IsNumericAffinity(aff2) ){ return SQLITE_AFF_NUMERIC; }else{ - return SQLITE_AFF_NONE; + return SQLITE_AFF_BLOB; } }else if( !aff1 && !aff2 ){ /* Neither side of the comparison is a column. Compare the ** results directly. */ - return SQLITE_AFF_NONE; + return SQLITE_AFF_BLOB; }else{ /* One side is a column, the other is not. Use the columns affinity. */ assert( aff1==0 || aff2==0 ); @@ -83023,7 +84380,7 @@ static char comparisonAffinity(Expr *pExpr){ }else if( ExprHasProperty(pExpr, EP_xIsSelect) ){ aff = sqlite3CompareAffinity(pExpr->x.pSelect->pEList->a[0].pExpr, aff); }else if( !aff ){ - aff = SQLITE_AFF_NONE; + aff = SQLITE_AFF_BLOB; } return aff; } @@ -83037,7 +84394,7 @@ static char comparisonAffinity(Expr *pExpr){ SQLITE_PRIVATE int sqlite3IndexAffinityOk(Expr *pExpr, char idx_affinity){ char aff = comparisonAffinity(pExpr); switch( aff ){ - case SQLITE_AFF_NONE: + case SQLITE_AFF_BLOB: return 1; case SQLITE_AFF_TEXT: return idx_affinity==SQLITE_AFF_TEXT; @@ -83843,7 +85200,7 @@ SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3 *db, SrcList *p, int flags){ pNewItem->isCorrelated = pOldItem->isCorrelated; pNewItem->viaCoroutine = pOldItem->viaCoroutine; pNewItem->isRecursive = pOldItem->isRecursive; - pNewItem->zIndex = sqlite3DbStrDup(db, pOldItem->zIndex); + pNewItem->zIndexedBy = sqlite3DbStrDup(db, pOldItem->zIndexedBy); pNewItem->notIndexed = pOldItem->notIndexed; pNewItem->pIndex = pOldItem->pIndex; pTab = pNewItem->pTab = pOldItem->pTab; @@ -84070,7 +85427,7 @@ SQLITE_PRIVATE u32 sqlite3ExprListFlags(const ExprList *pList){ ** ** sqlite3ExprIsConstant() pWalker->eCode==1 ** sqlite3ExprIsConstantNotJoin() pWalker->eCode==2 -** sqlite3ExprRefOneTableOnly() pWalker->eCode==3 +** sqlite3ExprIsTableConstant() pWalker->eCode==3 ** sqlite3ExprIsConstantOrFunction() pWalker->eCode==4 or 5 ** ** In all cases, the callbacks set Walker.eCode=0 and abort if the expression @@ -84178,7 +85535,7 @@ SQLITE_PRIVATE int sqlite3ExprIsConstantNotJoin(Expr *p){ } /* -** Walk an expression tree. Return non-zero if the expression constant +** Walk an expression tree. Return non-zero if the expression is constant ** for any single row of the table with cursor iCur. In other words, the ** expression must not refer to any non-deterministic function nor any ** table other than iCur. @@ -84284,7 +85641,7 @@ SQLITE_PRIVATE int sqlite3ExprCanBeNull(const Expr *p){ */ SQLITE_PRIVATE int sqlite3ExprNeedsNoAffinityChange(const Expr *p, char aff){ u8 op; - if( aff==SQLITE_AFF_NONE ) return 1; + if( aff==SQLITE_AFF_BLOB ) return 1; while( p->op==TK_UPLUS || p->op==TK_UMINUS ){ p = p->pLeft; } op = p->op; if( op==TK_REGISTER ) op = p->op2; @@ -84735,7 +86092,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect( int r1, r2, r3; if( !affinity ){ - affinity = SQLITE_AFF_NONE; + affinity = SQLITE_AFF_BLOB; } if( pKeyInfo ){ assert( sqlite3KeyInfoIsWriteable(pKeyInfo) ); @@ -85010,17 +86367,6 @@ static void sqlite3ExprCodeIN( } #endif /* SQLITE_OMIT_SUBQUERY */ -/* -** Duplicate an 8-byte value -*/ -static char *dup8bytes(Vdbe *v, const char *in){ - char *out = sqlite3DbMallocRaw(sqlite3VdbeDb(v), 8); - if( out ){ - memcpy(out, in, 8); - } - return out; -} - #ifndef SQLITE_OMIT_FLOATING_POINT /* ** Generate an instruction that will put the floating point @@ -85033,12 +86379,10 @@ static char *dup8bytes(Vdbe *v, const char *in){ static void codeReal(Vdbe *v, const char *z, int negateFlag, int iMem){ if( ALWAYS(z!=0) ){ double value; - char *zV; sqlite3AtoF(z, &value, sqlite3Strlen30(z), SQLITE_UTF8); assert( !sqlite3IsNaN(value) ); /* The new AtoF never returns NaN */ if( negateFlag ) value = -value; - zV = dup8bytes(v, (char*)&value); - sqlite3VdbeAddOp4(v, OP_Real, 0, iMem, 0, zV, P4_REAL); + sqlite3VdbeAddOp4Dup8(v, OP_Real, 0, iMem, 0, (u8*)&value, P4_REAL); } } #endif @@ -85064,10 +86408,8 @@ static void codeInteger(Parse *pParse, Expr *pExpr, int negFlag, int iMem){ assert( z!=0 ); c = sqlite3DecOrHexToI64(z, &value); if( c==0 || (c==2 && negFlag) ){ - char *zV; if( negFlag ){ value = c==2 ? SMALLEST_INT64 : -value; } - zV = dup8bytes(v, (char*)&value); - sqlite3VdbeAddOp4(v, OP_Int64, 0, iMem, 0, zV, P4_INT64); + sqlite3VdbeAddOp4Dup8(v, OP_Int64, 0, iMem, 0, (u8*)&value, P4_INT64); }else{ #ifdef SQLITE_OMIT_FLOATING_POINT sqlite3ErrorMsg(pParse, "oversized integer: %s%s", negFlag ? "-" : "", z); @@ -85672,7 +87014,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target) */ if( pDef->funcFlags & SQLITE_FUNC_UNLIKELY ){ assert( nFarg>=1 ); - sqlite3ExprCode(pParse, pFarg->a[0].pExpr, target); + inReg = sqlite3ExprCodeTarget(pParse, pFarg->a[0].pExpr, target); break; } @@ -85742,7 +87084,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target) if( !pColl ) pColl = db->pDfltColl; sqlite3VdbeAddOp4(v, OP_CollSeq, 0, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_Function, constMask, r1, target, + sqlite3VdbeAddOp4(v, OP_Function0, constMask, r1, target, (char*)pDef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nFarg); if( nFarg && constMask==0 ){ @@ -86113,268 +87455,6 @@ SQLITE_PRIVATE void sqlite3ExprCodeAndCache(Parse *pParse, Expr *pExpr, int targ exprToRegister(pExpr, iMem); } -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable explanation of an expression tree. -*/ -SQLITE_PRIVATE void sqlite3TreeViewExpr(TreeView *pView, const Expr *pExpr, u8 moreToFollow){ - const char *zBinOp = 0; /* Binary operator */ - const char *zUniOp = 0; /* Unary operator */ - pView = sqlite3TreeViewPush(pView, moreToFollow); - if( pExpr==0 ){ - sqlite3TreeViewLine(pView, "nil"); - sqlite3TreeViewPop(pView); - return; - } - switch( pExpr->op ){ - case TK_AGG_COLUMN: { - sqlite3TreeViewLine(pView, "AGG{%d:%d}", - pExpr->iTable, pExpr->iColumn); - break; - } - case TK_COLUMN: { - if( pExpr->iTable<0 ){ - /* This only happens when coding check constraints */ - sqlite3TreeViewLine(pView, "COLUMN(%d)", pExpr->iColumn); - }else{ - sqlite3TreeViewLine(pView, "{%d:%d}", - pExpr->iTable, pExpr->iColumn); - } - break; - } - case TK_INTEGER: { - if( pExpr->flags & EP_IntValue ){ - sqlite3TreeViewLine(pView, "%d", pExpr->u.iValue); - }else{ - sqlite3TreeViewLine(pView, "%s", pExpr->u.zToken); - } - break; - } -#ifndef SQLITE_OMIT_FLOATING_POINT - case TK_FLOAT: { - sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); - break; - } -#endif - case TK_STRING: { - sqlite3TreeViewLine(pView,"%Q", pExpr->u.zToken); - break; - } - case TK_NULL: { - sqlite3TreeViewLine(pView,"NULL"); - break; - } -#ifndef SQLITE_OMIT_BLOB_LITERAL - case TK_BLOB: { - sqlite3TreeViewLine(pView,"%s", pExpr->u.zToken); - break; - } -#endif - case TK_VARIABLE: { - sqlite3TreeViewLine(pView,"VARIABLE(%s,%d)", - pExpr->u.zToken, pExpr->iColumn); - break; - } - case TK_REGISTER: { - sqlite3TreeViewLine(pView,"REGISTER(%d)", pExpr->iTable); - break; - } - case TK_AS: { - sqlite3TreeViewLine(pView,"AS %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } - case TK_ID: { - sqlite3TreeViewLine(pView,"ID \"%w\"", pExpr->u.zToken); - break; - } -#ifndef SQLITE_OMIT_CAST - case TK_CAST: { - /* Expressions of the form: CAST(pLeft AS token) */ - sqlite3TreeViewLine(pView,"CAST %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } -#endif /* SQLITE_OMIT_CAST */ - case TK_LT: zBinOp = "LT"; break; - case TK_LE: zBinOp = "LE"; break; - case TK_GT: zBinOp = "GT"; break; - case TK_GE: zBinOp = "GE"; break; - case TK_NE: zBinOp = "NE"; break; - case TK_EQ: zBinOp = "EQ"; break; - case TK_IS: zBinOp = "IS"; break; - case TK_ISNOT: zBinOp = "ISNOT"; break; - case TK_AND: zBinOp = "AND"; break; - case TK_OR: zBinOp = "OR"; break; - case TK_PLUS: zBinOp = "ADD"; break; - case TK_STAR: zBinOp = "MUL"; break; - case TK_MINUS: zBinOp = "SUB"; break; - case TK_REM: zBinOp = "REM"; break; - case TK_BITAND: zBinOp = "BITAND"; break; - case TK_BITOR: zBinOp = "BITOR"; break; - case TK_SLASH: zBinOp = "DIV"; break; - case TK_LSHIFT: zBinOp = "LSHIFT"; break; - case TK_RSHIFT: zBinOp = "RSHIFT"; break; - case TK_CONCAT: zBinOp = "CONCAT"; break; - case TK_DOT: zBinOp = "DOT"; break; - - case TK_UMINUS: zUniOp = "UMINUS"; break; - case TK_UPLUS: zUniOp = "UPLUS"; break; - case TK_BITNOT: zUniOp = "BITNOT"; break; - case TK_NOT: zUniOp = "NOT"; break; - case TK_ISNULL: zUniOp = "ISNULL"; break; - case TK_NOTNULL: zUniOp = "NOTNULL"; break; - - case TK_COLLATE: { - sqlite3TreeViewLine(pView, "COLLATE %Q", pExpr->u.zToken); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - break; - } - - case TK_AGG_FUNCTION: - case TK_FUNCTION: { - ExprList *pFarg; /* List of function arguments */ - if( ExprHasProperty(pExpr, EP_TokenOnly) ){ - pFarg = 0; - }else{ - pFarg = pExpr->x.pList; - } - if( pExpr->op==TK_AGG_FUNCTION ){ - sqlite3TreeViewLine(pView, "AGG_FUNCTION%d %Q", - pExpr->op2, pExpr->u.zToken); - }else{ - sqlite3TreeViewLine(pView, "FUNCTION %Q", pExpr->u.zToken); - } - if( pFarg ){ - sqlite3TreeViewExprList(pView, pFarg, 0, 0); - } - break; - } -#ifndef SQLITE_OMIT_SUBQUERY - case TK_EXISTS: { - sqlite3TreeViewLine(pView, "EXISTS-expr"); - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - break; - } - case TK_SELECT: { - sqlite3TreeViewLine(pView, "SELECT-expr"); - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - break; - } - case TK_IN: { - sqlite3TreeViewLine(pView, "IN"); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - if( ExprHasProperty(pExpr, EP_xIsSelect) ){ - sqlite3TreeViewSelect(pView, pExpr->x.pSelect, 0); - }else{ - sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); - } - break; - } -#endif /* SQLITE_OMIT_SUBQUERY */ - - /* - ** x BETWEEN y AND z - ** - ** This is equivalent to - ** - ** x>=y AND x<=z - ** - ** X is stored in pExpr->pLeft. - ** Y is stored in pExpr->pList->a[0].pExpr. - ** Z is stored in pExpr->pList->a[1].pExpr. - */ - case TK_BETWEEN: { - Expr *pX = pExpr->pLeft; - Expr *pY = pExpr->x.pList->a[0].pExpr; - Expr *pZ = pExpr->x.pList->a[1].pExpr; - sqlite3TreeViewLine(pView, "BETWEEN"); - sqlite3TreeViewExpr(pView, pX, 1); - sqlite3TreeViewExpr(pView, pY, 1); - sqlite3TreeViewExpr(pView, pZ, 0); - break; - } - case TK_TRIGGER: { - /* If the opcode is TK_TRIGGER, then the expression is a reference - ** to a column in the new.* or old.* pseudo-tables available to - ** trigger programs. In this case Expr.iTable is set to 1 for the - ** new.* pseudo-table, or 0 for the old.* pseudo-table. Expr.iColumn - ** is set to the column of the pseudo-table to read, or to -1 to - ** read the rowid field. - */ - sqlite3TreeViewLine(pView, "%s(%d)", - pExpr->iTable ? "NEW" : "OLD", pExpr->iColumn); - break; - } - case TK_CASE: { - sqlite3TreeViewLine(pView, "CASE"); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - sqlite3TreeViewExprList(pView, pExpr->x.pList, 0, 0); - break; - } -#ifndef SQLITE_OMIT_TRIGGER - case TK_RAISE: { - const char *zType = "unk"; - switch( pExpr->affinity ){ - case OE_Rollback: zType = "rollback"; break; - case OE_Abort: zType = "abort"; break; - case OE_Fail: zType = "fail"; break; - case OE_Ignore: zType = "ignore"; break; - } - sqlite3TreeViewLine(pView, "RAISE %s(%Q)", zType, pExpr->u.zToken); - break; - } -#endif - default: { - sqlite3TreeViewLine(pView, "op=%d", pExpr->op); - break; - } - } - if( zBinOp ){ - sqlite3TreeViewLine(pView, "%s", zBinOp); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 1); - sqlite3TreeViewExpr(pView, pExpr->pRight, 0); - }else if( zUniOp ){ - sqlite3TreeViewLine(pView, "%s", zUniOp); - sqlite3TreeViewExpr(pView, pExpr->pLeft, 0); - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable explanation of an expression list. -*/ -SQLITE_PRIVATE void sqlite3TreeViewExprList( - TreeView *pView, - const ExprList *pList, - u8 moreToFollow, - const char *zLabel -){ - int i; - pView = sqlite3TreeViewPush(pView, moreToFollow); - if( zLabel==0 || zLabel[0]==0 ) zLabel = "LIST"; - if( pList==0 ){ - sqlite3TreeViewLine(pView, "%s (empty)", zLabel); - }else{ - sqlite3TreeViewLine(pView, "%s", zLabel); - for(i=0; i<pList->nExpr; i++){ - sqlite3TreeViewExpr(pView, pList->a[i].pExpr, i<pList->nExpr-1); -#if 0 - if( pList->a[i].zName ){ - sqlite3ExplainPrintf(pOut, " AS %s", pList->a[i].zName); - } - if( pList->a[i].bSpanIsTab ){ - sqlite3ExplainPrintf(pOut, " (%s)", pList->a[i].zSpan); - } -#endif - } - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - /* ** Generate code that pushes the value of every element of the given ** expression list into a sequence of registers beginning at target. @@ -86766,6 +87846,21 @@ SQLITE_PRIVATE void sqlite3ExprIfFalse(Parse *pParse, Expr *pExpr, int dest, int sqlite3ReleaseTempReg(pParse, regFree2); } +/* +** Like sqlite3ExprIfFalse() except that a copy is made of pExpr before +** code generation, and that copy is deleted after code generation. This +** ensures that the original pExpr is unchanged. +*/ +SQLITE_PRIVATE void sqlite3ExprIfFalseDup(Parse *pParse, Expr *pExpr, int dest,int jumpIfNull){ + sqlite3 *db = pParse->db; + Expr *pCopy = sqlite3ExprDup(db, pExpr, 0); + if( db->mallocFailed==0 ){ + sqlite3ExprIfFalse(pParse, pCopy, dest, jumpIfNull); + } + sqlite3ExprDelete(db, pCopy); +} + + /* ** Do a deep comparison of two expression trees. Return 0 if the two ** expressions are completely identical. Return 1 if they differ only @@ -87239,6 +88334,7 @@ SQLITE_PRIVATE void sqlite3ClearTempRegCache(Parse *pParse){ ** This file contains C code routines that used to generate VDBE code ** that implements the ALTER TABLE command. */ +/* #include "sqliteInt.h" */ /* ** The code in this file only exists if we are not omitting the @@ -87918,7 +89014,7 @@ SQLITE_PRIVATE void sqlite3AlterFinishAddColumn(Parse *pParse, Token *pColDef){ if( pDflt ){ sqlite3_value *pVal = 0; int rc; - rc = sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_NONE, &pVal); + rc = sqlite3ValueFromExpr(db, pDflt, SQLITE_UTF8, SQLITE_AFF_BLOB, &pVal); assert( rc==SQLITE_OK || rc==SQLITE_NOMEM ); if( rc!=SQLITE_OK ){ db->mallocFailed = 1; @@ -88201,6 +89297,7 @@ exit_begin_add_column: ** integer in the equivalent columns in sqlite_stat4. */ #ifndef SQLITE_OMIT_ANALYZE +/* #include "sqliteInt.h" */ #if defined(SQLITE_ENABLE_STAT4) # define IsStat4 1 @@ -89003,7 +90100,7 @@ static void callStatGet(Vdbe *v, int regStat4, int iParam, int regOut){ #else UNUSED_PARAMETER( iParam ); #endif - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4, regOut); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4, regOut); sqlite3VdbeChangeP4(v, -1, (char*)&statGetFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 1 + IsStat34); } @@ -89158,7 +90255,7 @@ static void analyzeOneTable( #endif sqlite3VdbeAddOp2(v, OP_Integer, nCol, regStat4+1); sqlite3VdbeAddOp2(v, OP_Integer, pIdx->nKeyCol, regStat4+2); - sqlite3VdbeAddOp3(v, OP_Function, 0, regStat4+1, regStat4); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regStat4+1, regStat4); sqlite3VdbeChangeP4(v, -1, (char*)&statInitFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); @@ -89254,7 +90351,7 @@ static void analyzeOneTable( } #endif assert( regChng==(regStat4+1) ); - sqlite3VdbeAddOp3(v, OP_Function, 1, regStat4, regTemp); + sqlite3VdbeAddOp3(v, OP_Function0, 1, regStat4, regTemp); sqlite3VdbeChangeP4(v, -1, (char*)&statPushFuncdef, P4_FUNCDEF); sqlite3VdbeChangeP5(v, 2+IsStat34); sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, addrNextRow); VdbeCoverage(v); @@ -89966,6 +91063,7 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){ ************************************************************************* ** This file contains code used to implement the ATTACH and DETACH commands. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_ATTACH /* @@ -90313,7 +91411,7 @@ static void codeAttach( assert( v || db->mallocFailed ); if( v ){ - sqlite3VdbeAddOp3(v, OP_Function, 0, regArgs+3-pFunc->nArg, regArgs+3); + sqlite3VdbeAddOp3(v, OP_Function0, 0, regArgs+3-pFunc->nArg, regArgs+3); assert( pFunc->nArg==-1 || (pFunc->nArg&0xff)==pFunc->nArg ); sqlite3VdbeChangeP5(v, (u8)(pFunc->nArg)); sqlite3VdbeChangeP4(v, -1, (char *)pFunc, P4_FUNCDEF); @@ -90555,6 +91653,7 @@ SQLITE_PRIVATE int sqlite3FixTriggerStep( ** systems that do not need this facility may omit it by recompiling ** the library with -DSQLITE_OMIT_AUTHORIZATION=1 */ +/* #include "sqliteInt.h" */ /* ** All of the code in this file may be omitted by defining a single @@ -90825,6 +91924,7 @@ SQLITE_PRIVATE void sqlite3AuthContextPop(AuthContext *pContext){ ** COMMIT ** ROLLBACK */ +/* #include "sqliteInt.h" */ /* ** This routine is called when a new SQL statement is beginning to @@ -91778,7 +92878,7 @@ SQLITE_PRIVATE void sqlite3StartTable( int j1; int fileFormat; int reg1, reg2, reg3; - sqlite3BeginWriteOperation(pParse, 0, iDb); + sqlite3BeginWriteOperation(pParse, 1, iDb); #ifndef SQLITE_OMIT_VIRTUALTABLE if( isVirtual ){ @@ -91894,10 +92994,10 @@ SQLITE_PRIVATE void sqlite3AddColumn(Parse *pParse, Token *pName){ pCol->zName = z; /* If there is no type specified, columns have the default affinity - ** 'NONE'. If there is a type specified, then sqlite3AddColumnType() will + ** 'BLOB'. If there is a type specified, then sqlite3AddColumnType() will ** be called next to set pCol->affinity correctly. */ - pCol->affinity = SQLITE_AFF_NONE; + pCol->affinity = SQLITE_AFF_BLOB; pCol->szEst = 1; p->nCol++; } @@ -91932,7 +93032,7 @@ SQLITE_PRIVATE void sqlite3AddNotNull(Parse *pParse, int onError){ ** 'CHAR' | SQLITE_AFF_TEXT ** 'CLOB' | SQLITE_AFF_TEXT ** 'TEXT' | SQLITE_AFF_TEXT -** 'BLOB' | SQLITE_AFF_NONE +** 'BLOB' | SQLITE_AFF_BLOB ** 'REAL' | SQLITE_AFF_REAL ** 'FLOA' | SQLITE_AFF_REAL ** 'DOUB' | SQLITE_AFF_REAL @@ -91958,7 +93058,7 @@ SQLITE_PRIVATE char sqlite3AffinityType(const char *zIn, u8 *pszEst){ aff = SQLITE_AFF_TEXT; }else if( h==(('b'<<24)+('l'<<16)+('o'<<8)+'b') /* BLOB */ && (aff==SQLITE_AFF_NUMERIC || aff==SQLITE_AFF_REAL) ){ - aff = SQLITE_AFF_NONE; + aff = SQLITE_AFF_BLOB; if( zIn[0]=='(' ) zChar = zIn; #ifndef SQLITE_OMIT_FLOATING_POINT }else if( h==(('r'<<24)+('e'<<16)+('a'<<8)+'l') /* REAL */ @@ -92127,14 +93227,11 @@ SQLITE_PRIVATE void sqlite3AddPrimaryKey( "INTEGER PRIMARY KEY"); #endif }else{ - Vdbe *v = pParse->pVdbe; Index *p; - if( v ) pParse->addrSkipPK = sqlite3VdbeAddOp0(v, OP_Noop); p = sqlite3CreateIndex(pParse, 0, 0, 0, pList, onError, 0, 0, sortOrder, 0); if( p ){ p->idxType = SQLITE_IDXTYPE_PRIMARYKEY; - if( v ) sqlite3VdbeJumpHere(v, pParse->addrSkipPK); } pList = 0; } @@ -92353,7 +93450,7 @@ static char *createTableStmt(sqlite3 *db, Table *p){ zStmt[k++] = '('; for(pCol=p->aCol, i=0; i<p->nCol; i++, pCol++){ static const char * const azType[] = { - /* SQLITE_AFF_NONE */ "", + /* SQLITE_AFF_BLOB */ "", /* SQLITE_AFF_TEXT */ " TEXT", /* SQLITE_AFF_NUMERIC */ " NUM", /* SQLITE_AFF_INTEGER */ " INT", @@ -92366,17 +93463,17 @@ static char *createTableStmt(sqlite3 *db, Table *p){ k += sqlite3Strlen30(&zStmt[k]); zSep = zSep2; identPut(zStmt, &k, pCol->zName); - assert( pCol->affinity-SQLITE_AFF_NONE >= 0 ); - assert( pCol->affinity-SQLITE_AFF_NONE < ArraySize(azType) ); - testcase( pCol->affinity==SQLITE_AFF_NONE ); + assert( pCol->affinity-SQLITE_AFF_BLOB >= 0 ); + assert( pCol->affinity-SQLITE_AFF_BLOB < ArraySize(azType) ); + testcase( pCol->affinity==SQLITE_AFF_BLOB ); testcase( pCol->affinity==SQLITE_AFF_TEXT ); testcase( pCol->affinity==SQLITE_AFF_NUMERIC ); testcase( pCol->affinity==SQLITE_AFF_INTEGER ); testcase( pCol->affinity==SQLITE_AFF_REAL ); - zType = azType[pCol->affinity - SQLITE_AFF_NONE]; + zType = azType[pCol->affinity - SQLITE_AFF_BLOB]; len = sqlite3Strlen30(zType); - assert( pCol->affinity==SQLITE_AFF_NONE + assert( pCol->affinity==SQLITE_AFF_BLOB || pCol->affinity==sqlite3AffinityType(zType, 0) ); memcpy(&zStmt[k], zType, len); k += len; @@ -92487,14 +93584,6 @@ static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){ sqlite3VdbeGetOp(v, pParse->addrCrTab)->opcode = OP_CreateIndex; } - /* Bypass the creation of the PRIMARY KEY btree and the sqlite_master - ** table entry. - */ - if( pParse->addrSkipPK ){ - assert( v ); - sqlite3VdbeGetOp(v, pParse->addrSkipPK)->opcode = OP_Goto; - } - /* Locate the PRIMARY KEY index. Or, if this table was originally ** an INTEGER PRIMARY KEY table, create a new PRIMARY KEY index. */ @@ -92512,6 +93601,16 @@ static void convertToWithoutRowidTable(Parse *pParse, Table *pTab){ pTab->iPKey = -1; }else{ pPk = sqlite3PrimaryKeyIndex(pTab); + + /* Bypass the creation of the PRIMARY KEY btree and the sqlite_master + ** table entry. This is only required if currently generating VDBE + ** code for a CREATE TABLE (not when parsing one as part of reading + ** a database schema). */ + if( v ){ + assert( db->init.busy==0 ); + sqlite3VdbeGetOp(v, pPk->tnum)->opcode = OP_Goto; + } + /* ** Remove all redundant columns from the PRIMARY KEY. For example, change ** "PRIMARY KEY(a,b,a,b,c,b,c,d)" into just "PRIMARY KEY(a,b,c,d)". Later @@ -92647,7 +93746,7 @@ SQLITE_PRIVATE void sqlite3EndTable( if( (p->tabFlags & TF_HasPrimaryKey)==0 ){ sqlite3ErrorMsg(pParse, "PRIMARY KEY missing on table %s", p->zName); }else{ - p->tabFlags |= TF_WithoutRowid; + p->tabFlags |= TF_WithoutRowid | TF_NoVisibleRowid; convertToWithoutRowidTable(pParse, p); } } @@ -92715,26 +93814,46 @@ SQLITE_PRIVATE void sqlite3EndTable( ** be redundant. */ if( pSelect ){ - SelectDest dest; - Table *pSelTab; - + SelectDest dest; /* Where the SELECT should store results */ + int regYield; /* Register holding co-routine entry-point */ + int addrTop; /* Top of the co-routine */ + int regRec; /* A record to be insert into the new table */ + int regRowid; /* Rowid of the next row to insert */ + int addrInsLoop; /* Top of the loop for inserting rows */ + Table *pSelTab; /* A table that describes the SELECT results */ + + regYield = ++pParse->nMem; + regRec = ++pParse->nMem; + regRowid = ++pParse->nMem; assert(pParse->nTab==1); + sqlite3MayAbort(pParse); sqlite3VdbeAddOp3(v, OP_OpenWrite, 1, pParse->regRoot, iDb); sqlite3VdbeChangeP5(v, OPFLAG_P2ISREG); pParse->nTab = 2; - sqlite3SelectDestInit(&dest, SRT_Table, 1); + addrTop = sqlite3VdbeCurrentAddr(v) + 1; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, addrTop); + sqlite3SelectDestInit(&dest, SRT_Coroutine, regYield); sqlite3Select(pParse, pSelect, &dest); + sqlite3VdbeAddOp1(v, OP_EndCoroutine, regYield); + sqlite3VdbeJumpHere(v, addrTop - 1); + if( pParse->nErr ) return; + pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect); + if( pSelTab==0 ) return; + assert( p->aCol==0 ); + p->nCol = pSelTab->nCol; + p->aCol = pSelTab->aCol; + pSelTab->nCol = 0; + pSelTab->aCol = 0; + sqlite3DeleteTable(db, pSelTab); + addrInsLoop = sqlite3VdbeAddOp1(v, OP_Yield, dest.iSDParm); + VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_MakeRecord, dest.iSdst, dest.nSdst, regRec); + sqlite3TableAffinity(v, p, 0); + sqlite3VdbeAddOp2(v, OP_NewRowid, 1, regRowid); + sqlite3VdbeAddOp3(v, OP_Insert, 1, regRec, regRowid); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrInsLoop); + sqlite3VdbeJumpHere(v, addrInsLoop); sqlite3VdbeAddOp1(v, OP_Close, 1); - if( pParse->nErr==0 ){ - pSelTab = sqlite3ResultSetOfSelect(pParse, pSelect); - if( pSelTab==0 ) return; - assert( p->aCol==0 ); - p->nCol = pSelTab->nCol; - p->aCol = pSelTab->aCol; - pSelTab->nCol = 0; - pSelTab->aCol = 0; - sqlite3DeleteTable(db, pSelTab); - } } /* Compute the complete text of the CREATE statement */ @@ -94033,10 +95152,15 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( v = sqlite3GetVdbe(pParse); if( v==0 ) goto exit_create_index; - - /* Create the rootpage for the index - */ sqlite3BeginWriteOperation(pParse, 1, iDb); + + /* Create the rootpage for the index using CreateIndex. But before + ** doing so, code a Noop instruction and store its address in + ** Index.tnum. This is required in case this index is actually a + ** PRIMARY KEY and the table is actually a WITHOUT ROWID table. In + ** that case the convertToWithoutRowidTable() routine will replace + ** the Noop with a Goto to jump over the VDBE code generated below. */ + pIndex->tnum = sqlite3VdbeAddOp0(v, OP_Noop); sqlite3VdbeAddOp2(v, OP_CreateIndex, iDb, iMem); /* Gather the complete text of the CREATE INDEX statement into @@ -94076,6 +95200,8 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex( sqlite3MPrintf(db, "name='%q' AND type='index'", pIndex->zName)); sqlite3VdbeAddOp1(v, OP_Expire, 0); } + + sqlite3VdbeJumpHere(v, pIndex->tnum); } /* When adding an index to the list of indices for a table, make @@ -94478,7 +95604,7 @@ SQLITE_PRIVATE void sqlite3SrcListDelete(sqlite3 *db, SrcList *pList){ sqlite3DbFree(db, pItem->zDatabase); sqlite3DbFree(db, pItem->zName); sqlite3DbFree(db, pItem->zAlias); - sqlite3DbFree(db, pItem->zIndex); + sqlite3DbFree(db, pItem->zIndexedBy); sqlite3DeleteTable(db, pItem->pTab); sqlite3SelectDelete(db, pItem->pSelect); sqlite3ExprDelete(db, pItem->pOn); @@ -94551,13 +95677,13 @@ SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pI assert( pIndexedBy!=0 ); if( p && ALWAYS(p->nSrc>0) ){ struct SrcList_item *pItem = &p->a[p->nSrc-1]; - assert( pItem->notIndexed==0 && pItem->zIndex==0 ); + assert( pItem->notIndexed==0 && pItem->zIndexedBy==0 ); if( pIndexedBy->n==1 && !pIndexedBy->z ){ /* A "NOT INDEXED" clause was supplied. See parse.y ** construct "indexed_opt" for details. */ pItem->notIndexed = 1; }else{ - pItem->zIndex = sqlite3NameFromToken(pParse->db, pIndexedBy); + pItem->zIndexedBy = sqlite3NameFromToken(pParse->db, pIndexedBy); } } } @@ -95119,6 +96245,7 @@ SQLITE_PRIVATE void sqlite3WithDelete(sqlite3 *db, With *pWith){ ** of user defined functions and collation sequences. */ +/* #include "sqliteInt.h" */ /* ** Invoke the 'collation needed' callback to request a collation sequence @@ -95596,6 +96723,7 @@ SQLITE_PRIVATE Schema *sqlite3SchemaGet(sqlite3 *db, Btree *pBt){ ** This file contains C code routines that are called by the parser ** in order to generate code for DELETE FROM statements. */ +/* #include "sqliteInt.h" */ /* ** While a SrcList can in general represent multiple tables and subqueries @@ -96381,8 +97509,8 @@ SQLITE_PRIVATE int sqlite3GenerateIndexKey( *piPartIdxLabel = sqlite3VdbeMakeLabel(v); pParse->iPartIdxTab = iDataCur; sqlite3ExprCachePush(pParse); - sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, - SQLITE_JUMPIFNULL); + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, *piPartIdxLabel, + SQLITE_JUMPIFNULL); }else{ *piPartIdxLabel = 0; } @@ -96438,8 +97566,10 @@ SQLITE_PRIVATE void sqlite3ResolvePartIdxLabel(Parse *pParse, int iLabel){ ** functions of SQLite. (Some function, and in particular the date and ** time functions, are implemented separately.) */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* #include <assert.h> */ +/* #include "vdbeInt.h" */ /* ** Return the collating function associated with a function. @@ -96998,17 +98128,15 @@ struct compareInfo { /* ** For LIKE and GLOB matching on EBCDIC machines, assume that every -** character is exactly one byte in size. Also, all characters are -** able to participate in upper-case-to-lower-case mappings in EBCDIC -** whereas only characters less than 0x80 do in ASCII. +** character is exactly one byte in size. Also, provde the Utf8Read() +** macro for fast reading of the next character in the common case where +** the next character is ASCII. */ #if defined(SQLITE_EBCDIC) # define sqlite3Utf8Read(A) (*((*A)++)) -# define GlobUpperToLower(A) A = sqlite3UpperToLower[A] -# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] +# define Utf8Read(A) (*(A++)) #else -# define GlobUpperToLower(A) if( A<=0x7f ){ A = sqlite3UpperToLower[A]; } -# define GlobUpperToLowerAscii(A) A = sqlite3UpperToLower[A] +# define Utf8Read(A) (A[0]<0x80?*(A++):sqlite3Utf8Read(&A)) #endif static const struct compareInfo globInfo = { '*', '?', '[', 0 }; @@ -97050,7 +98178,7 @@ static const struct compareInfo likeInfoAlt = { '%', '_', 0, 0 }; ** Ec Where E is the "esc" character and c is any other ** character, including '%', '_', and esc, match exactly c. ** -** The comments through this routine usually assume glob matching. +** The comments within this routine usually assume glob matching. ** ** This routine is usually quick, but can be N**2 in the worst case. */ @@ -97074,13 +98202,12 @@ static int patternCompare( */ matchOther = esc ? esc : pInfo->matchSet; - while( (c = sqlite3Utf8Read(&zPattern))!=0 ){ + while( (c = Utf8Read(zPattern))!=0 ){ if( c==matchAll ){ /* Match "*" */ /* Skip over multiple "*" characters in the pattern. If there ** are also "?" characters, skip those as well, but consume a ** single character of the input string for each "?" skipped */ - while( (c=sqlite3Utf8Read(&zPattern)) == matchAll - || c == matchOne ){ + while( (c=Utf8Read(zPattern)) == matchAll || c == matchOne ){ if( c==matchOne && sqlite3Utf8Read(&zString)==0 ){ return 0; } @@ -97125,7 +98252,7 @@ static int patternCompare( if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } }else{ - while( (c2 = sqlite3Utf8Read(&zString))!=0 ){ + while( (c2 = Utf8Read(zString))!=0 ){ if( c2!=c ) continue; if( patternCompare(zPattern,zString,pInfo,esc) ) return 1; } @@ -97171,7 +98298,7 @@ static int patternCompare( continue; } } - c2 = sqlite3Utf8Read(&zString); + c2 = Utf8Read(zString); if( c==c2 ) continue; if( noCase && c<0x80 && c2<0x80 && sqlite3Tolower(c)==sqlite3Tolower(c2) ){ continue; @@ -97548,16 +98675,14 @@ static void zeroblobFunc( sqlite3_value **argv ){ i64 n; - sqlite3 *db = sqlite3_context_db_handle(context); + int rc; assert( argc==1 ); UNUSED_PARAMETER(argc); n = sqlite3_value_int64(argv[0]); - testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH] ); - testcase( n==db->aLimit[SQLITE_LIMIT_LENGTH]+1 ); - if( n>db->aLimit[SQLITE_LIMIT_LENGTH] ){ - sqlite3_result_error_toobig(context); - }else{ - sqlite3_result_zeroblob(context, (int)n); /* IMP: R-00293-64994 */ + if( n<0 ) n = 0; + rc = sqlite3_result_zeroblob64(context, n); /* IMP: R-00293-64994 */ + if( rc ){ + sqlite3_result_error_code(context, rc); } } @@ -98238,6 +99363,7 @@ SQLITE_PRIVATE void sqlite3RegisterGlobalFunctions(void){ ** This file contains code used by the compiler to add foreign key ** support to compiled SQL statements. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_FOREIGN_KEY #ifndef SQLITE_OMIT_TRIGGER @@ -99642,6 +100768,7 @@ SQLITE_PRIVATE void sqlite3FkDelete(sqlite3 *db, Table *pTab){ ** This file contains C code routines that are called by the parser ** to handle INSERT statements in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Generate code that will @@ -99671,7 +100798,7 @@ SQLITE_PRIVATE void sqlite3OpenTable( }else{ Index *pPk = sqlite3PrimaryKeyIndex(pTab); assert( pPk!=0 ); - assert( pPk->tnum=pTab->tnum ); + assert( pPk->tnum==pTab->tnum ); sqlite3VdbeAddOp3(v, opcode, iCur, pPk->tnum, iDb); sqlite3VdbeSetP4KeyInfo(pParse, pPk); VdbeComment((v, "%s", pTab->zName)); @@ -99685,7 +100812,7 @@ SQLITE_PRIVATE void sqlite3OpenTable( ** ** Character Column affinity ** ------------------------------ -** 'A' NONE +** 'A' BLOB ** 'B' TEXT ** 'C' NUMERIC ** 'D' INTEGER @@ -99728,9 +100855,9 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *v, Index *pIdx){ /* ** Compute the affinity string for table pTab, if it has not already been -** computed. As an optimization, omit trailing SQLITE_AFF_NONE affinities. +** computed. As an optimization, omit trailing SQLITE_AFF_BLOB affinities. ** -** If the affinity exists (if it is no entirely SQLITE_AFF_NONE values) and +** If the affinity exists (if it is no entirely SQLITE_AFF_BLOB values) and ** if iReg>0 then code an OP_Affinity opcode that will set the affinities ** for register iReg and following. Or if affinities exists and iReg==0, ** then just set the P4 operand of the previous opcode (which should be @@ -99740,7 +100867,7 @@ SQLITE_PRIVATE const char *sqlite3IndexAffinityStr(Vdbe *v, Index *pIdx){ ** ** Character Column affinity ** ------------------------------ -** 'A' NONE +** 'A' BLOB ** 'B' TEXT ** 'C' NUMERIC ** 'D' INTEGER @@ -99762,7 +100889,7 @@ SQLITE_PRIVATE void sqlite3TableAffinity(Vdbe *v, Table *pTab, int iReg){ } do{ zColAff[i--] = 0; - }while( i>=0 && zColAff[i]==SQLITE_AFF_NONE ); + }while( i>=0 && zColAff[i]==SQLITE_AFF_BLOB ); pTab->zColAff = zColAff; } i = sqlite3Strlen30(zColAff); @@ -101010,8 +102137,8 @@ SQLITE_PRIVATE void sqlite3GenerateConstraintChecks( if( pIdx->pPartIdxWhere ){ sqlite3VdbeAddOp2(v, OP_Null, 0, aRegIdx[ix]); pParse->ckBase = regNewData+1; - sqlite3ExprIfFalse(pParse, pIdx->pPartIdxWhere, addrUniqueOk, - SQLITE_JUMPIFNULL); + sqlite3ExprIfFalseDup(pParse, pIdx->pPartIdxWhere, addrUniqueOk, + SQLITE_JUMPIFNULL); pParse->ckBase = 0; } @@ -101625,7 +102752,7 @@ static int xferOptimization( sqlite3TableLock(pParse, iDbSrc, pSrc->tnum, 0, pSrc->zName); } for(pDestIdx=pDest->pIndex; pDestIdx; pDestIdx=pDestIdx->pNext){ - u8 useSeekResult = 0; + u8 idxInsFlags = 0; for(pSrcIdx=pSrc->pIndex; ALWAYS(pSrcIdx); pSrcIdx=pSrcIdx->pNext){ if( xferCompatibleIndex(pDestIdx, pSrcIdx) ) break; } @@ -101660,12 +102787,15 @@ static int xferOptimization( if( sqlite3_stricmp("BINARY", zColl) ) break; } if( i==pSrcIdx->nColumn ){ - useSeekResult = OPFLAG_USESEEKRESULT; + idxInsFlags = OPFLAG_USESEEKRESULT; sqlite3VdbeAddOp3(v, OP_Last, iDest, 0, -1); } } + if( !HasRowid(pSrc) && pDestIdx->idxType==2 ){ + idxInsFlags |= OPFLAG_NCHANGE; + } sqlite3VdbeAddOp3(v, OP_IdxInsert, iDest, regData, 1); - sqlite3VdbeChangeP5(v, useSeekResult); + sqlite3VdbeChangeP5(v, idxInsFlags); sqlite3VdbeAddOp2(v, OP_Next, iSrc, addr1+1); VdbeCoverage(v); sqlite3VdbeJumpHere(v, addr1); sqlite3VdbeAddOp2(v, OP_Close, iSrc, 0); @@ -101704,6 +102834,7 @@ static int xferOptimization( ** accessed by users of the library. */ +/* #include "sqliteInt.h" */ /* ** Execute SQL code. Return one of the SQLITE_ success/failure @@ -101872,6 +103003,7 @@ exec_out: */ #ifndef _SQLITE3EXT_H_ #define _SQLITE3EXT_H_ +/* #include "sqlite3.h" */ typedef struct sqlite3_api_routines sqlite3_api_routines; @@ -102121,6 +103253,11 @@ struct sqlite3_api_routines { void (*result_text64)(sqlite3_context*,const char*,sqlite3_uint64, void(*)(void*), unsigned char); int (*strglob)(const char*,const char*); + /* Version 3.8.11 and later */ + sqlite3_value *(*value_dup)(const sqlite3_value*); + void (*value_free)(sqlite3_value*); + int (*result_zeroblob64)(sqlite3_context*,sqlite3_uint64); + int (*bind_zeroblob64)(sqlite3_stmt*, int, sqlite3_uint64); }; /* @@ -102351,6 +103488,11 @@ struct sqlite3_api_routines { #define sqlite3_result_blob64 sqlite3_api->result_blob64 #define sqlite3_result_text64 sqlite3_api->result_text64 #define sqlite3_strglob sqlite3_api->strglob +/* Version 3.8.11 and later */ +#define sqlite3_value_dup sqlite3_api->value_dup +#define sqlite3_value_free sqlite3_api->value_free +#define sqlite3_result_zeroblob64 sqlite3_api->result_zeroblob64 +#define sqlite3_bind_zeroblob64 sqlite3_api->bind_zeroblob64 #endif /* SQLITE_CORE */ #ifndef SQLITE_CORE @@ -102372,6 +103514,7 @@ struct sqlite3_api_routines { /************** End of sqlite3ext.h ******************************************/ /************** Continuing where we left off in loadext.c ********************/ +/* #include "sqliteInt.h" */ /* #include <string.h> */ #ifndef SQLITE_OMIT_LOAD_EXTENSION @@ -102756,7 +103899,12 @@ static const sqlite3_api_routines sqlite3Apis = { sqlite3_reset_auto_extension, sqlite3_result_blob64, sqlite3_result_text64, - sqlite3_strglob + sqlite3_strglob, + /* Version 3.8.11 and later */ + (sqlite3_value*(*)(const sqlite3_value*))sqlite3_value_dup, + sqlite3_value_free, + sqlite3_result_zeroblob64, + sqlite3_bind_zeroblob64 }; /* @@ -103138,6 +104286,7 @@ SQLITE_PRIVATE void sqlite3AutoLoadExtensions(sqlite3 *db){ ************************************************************************* ** This file contains code used to implement the PRAGMA command. */ +/* #include "sqliteInt.h" */ #if !defined(SQLITE_ENABLE_LOCKING_STYLE) # if defined(__APPLE__) @@ -103244,7 +104393,7 @@ static const struct sPragmaNames { #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) { /* zName: */ "cache_size", /* ePragTyp: */ PragTyp_CACHE_SIZE, - /* ePragFlag: */ PragFlag_NeedSchema, + /* ePragFlag: */ 0, /* iArg: */ 0 }, #endif #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) @@ -103257,6 +104406,10 @@ static const struct sPragmaNames { /* ePragTyp: */ PragTyp_CASE_SENSITIVE_LIKE, /* ePragFlag: */ 0, /* iArg: */ 0 }, + { /* zName: */ "cell_size_check", + /* ePragTyp: */ PragTyp_FLAG, + /* ePragFlag: */ 0, + /* iArg: */ SQLITE_CellSizeCk }, #if !defined(SQLITE_OMIT_FLAG_PRAGMAS) { /* zName: */ "checkpoint_fullfsync", /* ePragTyp: */ PragTyp_FLAG, @@ -103614,7 +104767,7 @@ static const struct sPragmaNames { /* iArg: */ SQLITE_WriteSchema|SQLITE_RecoveryMode }, #endif }; -/* Number of pragmas: 59 on by default, 72 total. */ +/* Number of pragmas: 60 on by default, 73 total. */ /************** End of pragma.h **********************************************/ /************** Continuing where we left off in pragma.c *********************/ @@ -104310,11 +105463,13 @@ SQLITE_PRIVATE void sqlite3Pragma( case PragTyp_CACHE_SIZE: { assert( sqlite3SchemaMutexHeld(db, iDb, 0) ); if( !zRight ){ + if( sqlite3ReadSchema(pParse) ) goto pragma_out; returnSingleInt(pParse, "cache_size", pDb->pSchema->cache_size); }else{ int size = sqlite3Atoi(zRight); pDb->pSchema->cache_size = size; sqlite3BtreeSetCacheSize(pDb->pBt, pDb->pSchema->cache_size); + if( sqlite3ReadSchema(pParse) ) goto pragma_out; } break; } @@ -105585,6 +106740,7 @@ pragma_out: ** interface, and routines that contribute to loading the database schema ** from disk. */ +/* #include "sqliteInt.h" */ /* ** Fill the InitData structure with an error message that indicates @@ -105597,13 +106753,13 @@ static void corruptSchema( ){ sqlite3 *db = pData->db; if( !db->mallocFailed && (db->flags & SQLITE_RecoveryMode)==0 ){ + char *z; if( zObj==0 ) zObj = "?"; - sqlite3SetString(pData->pzErrMsg, db, - "malformed database schema (%s)", zObj); - if( zExtra ){ - *pData->pzErrMsg = sqlite3MAppendf(db, *pData->pzErrMsg, - "%s - %s", *pData->pzErrMsg, zExtra); - } + z = sqlite3_mprintf("malformed database schema (%s)", zObj); + if( z && zExtra ) z = sqlite3_mprintf("%z - %s", z, zExtra); + sqlite3DbFree(db, *pData->pzErrMsg); + *pData->pzErrMsg = z; + if( z==0 ) db->mallocFailed = 1; } pData->rc = db->mallocFailed ? SQLITE_NOMEM : SQLITE_CORRUPT_BKPT; } @@ -105795,7 +106951,7 @@ static int sqlite3InitOne(sqlite3 *db, int iDb, char **pzErrMsg){ if( !sqlite3BtreeIsInReadTrans(pDb->pBt) ){ rc = sqlite3BtreeBeginTrans(pDb->pBt, 0); if( rc!=SQLITE_OK ){ - sqlite3SetString(pzErrMsg, db, "%s", sqlite3ErrStr(rc)); + sqlite3SetString(pzErrMsg, db, sqlite3ErrStr(rc)); goto initone_error_out; } openedTransaction = 1; @@ -106479,6 +107635,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( ** This file contains C code routines that are called by the parser ** to handle SELECT statements in SQLite. */ +/* #include "sqliteInt.h" */ /* ** Trace output macros @@ -106487,7 +107644,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_prepare16_v2( /***/ int sqlite3SelectTrace = 0; # define SELECTTRACE(K,P,S,X) \ if(sqlite3SelectTrace&(K)) \ - sqlite3DebugPrintf("%*s%s.%p: ",(P)->nSelectIndent*2-2,"",(S)->zSelName,(S)),\ + sqlite3DebugPrintf("%*s%s.%p: ",(P)->nSelectIndent*2-2,"",\ + (S)->zSelName,(S)),\ sqlite3DebugPrintf X #else # define SELECTTRACE(K,P,S,X) @@ -106831,6 +107989,12 @@ static void setJoinExpr(Expr *p, int iTable){ assert( !ExprHasProperty(p, EP_TokenOnly|EP_Reduced) ); ExprSetVVAProperty(p, EP_NoReduce); p->iRightJoinTable = (i16)iTable; + if( p->op==TK_FUNCTION && p->x.pList ){ + int i; + for(i=0; i<p->x.pList->nExpr; i++){ + setJoinExpr(p->x.pList->a[i].pExpr, iTable); + } + } setJoinExpr(p->pLeft, iTable); p = p->pRight; } @@ -107179,8 +108343,13 @@ static void selectInnerLoop( /* If the destination is an EXISTS(...) expression, the actual ** values returned by the SELECT are not required. */ - sqlite3ExprCodeExprList(pParse, pEList, regResult, - (eDest==SRT_Output||eDest==SRT_Coroutine)?SQLITE_ECEL_DUP:0); + u8 ecelFlags; + if( eDest==SRT_Mem || eDest==SRT_Output || eDest==SRT_Coroutine ){ + ecelFlags = SQLITE_ECEL_DUP; + }else{ + ecelFlags = 0; + } + sqlite3ExprCodeExprList(pParse, pEList, regResult, ecelFlags); } /* If the DISTINCT keyword was present on the SELECT statement @@ -107235,7 +108404,8 @@ static void selectInnerLoop( default: { assert( pDistinct->eTnctType==WHERE_DISTINCT_UNORDERED ); - codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol, regResult); + codeDistinct(pParse, pDistinct->tabTnct, iContinue, nResultCol, + regResult); break; } } @@ -107277,6 +108447,8 @@ static void selectInnerLoop( int r1 = sqlite3GetTempRange(pParse, nPrefixReg+1); testcase( eDest==SRT_Table ); testcase( eDest==SRT_EphemTab ); + testcase( eDest==SRT_Fifo ); + testcase( eDest==SRT_DistFifo ); sqlite3VdbeAddOp3(v, OP_MakeRecord, regResult, nResultCol, r1+nPrefixReg); #ifndef SQLITE_OMIT_CTE if( eDest==SRT_DistFifo ){ @@ -107286,7 +108458,8 @@ static void selectInnerLoop( ** current row to the index and proceed with writing it to the ** output table as well. */ int addr = sqlite3VdbeCurrentAddr(v) + 4; - sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); VdbeCoverage(v); + sqlite3VdbeAddOp4Int(v, OP_Found, iParm+1, addr, r1, 0); + VdbeCoverage(v); sqlite3VdbeAddOp2(v, OP_IdxInsert, iParm+1, r1); assert( pSort==0 ); } @@ -107532,7 +108705,6 @@ static KeyInfo *keyInfoFromExprList( return pInfo; } -#ifndef SQLITE_OMIT_COMPOUND_SELECT /* ** Name of the connection operator, used for error messages. */ @@ -107546,7 +108718,6 @@ static const char *selectOpName(int id){ } return z; } -#endif /* SQLITE_OMIT_COMPOUND_SELECT */ #ifndef SQLITE_OMIT_EXPLAIN /* @@ -107692,10 +108863,7 @@ static void generateSortTail( VdbeComment((v, "%s", aOutEx[i].zName ? aOutEx[i].zName : aOutEx[i].zSpan)); } switch( eDest ){ - case SRT_Table: case SRT_EphemTab: { - testcase( eDest==SRT_Table ); - testcase( eDest==SRT_EphemTab ); sqlite3VdbeAddOp2(v, OP_NewRowid, iParm, regRowid); sqlite3VdbeAddOp3(v, OP_Insert, iParm, regRow, regRowid); sqlite3VdbeChangeP5(v, OPFLAG_APPEND); @@ -107772,28 +108940,27 @@ static void generateSortTail( */ #ifdef SQLITE_ENABLE_COLUMN_METADATA # define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,C,D,E,F) +#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */ +# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F) +#endif static const char *columnTypeImpl( NameContext *pNC, Expr *pExpr, +#ifdef SQLITE_ENABLE_COLUMN_METADATA const char **pzOrigDb, const char **pzOrigTab, const char **pzOrigCol, +#endif u8 *pEstWidth ){ - char const *zOrigDb = 0; - char const *zOrigTab = 0; - char const *zOrigCol = 0; -#else /* if !defined(SQLITE_ENABLE_COLUMN_METADATA) */ -# define columnType(A,B,C,D,E,F) columnTypeImpl(A,B,F) -static const char *columnTypeImpl( - NameContext *pNC, - Expr *pExpr, - u8 *pEstWidth -){ -#endif /* !defined(SQLITE_ENABLE_COLUMN_METADATA) */ char const *zType = 0; int j; u8 estWidth = 1; +#ifdef SQLITE_ENABLE_COLUMN_METADATA + char const *zOrigDb = 0; + char const *zOrigTab = 0; + char const *zOrigCol = 0; +#endif if( NEVER(pExpr==0) || pNC->pSrcList==0 ) return 0; switch( pExpr->op ){ @@ -107846,10 +109013,13 @@ static const char *columnTypeImpl( ** of the SELECT statement. Return the declaration type and origin ** data for the result-set column of the sub-select. */ - if( iCol>=0 && iCol<pS->pEList->nExpr ){ + if( iCol>=0 && ALWAYS(iCol<pS->pEList->nExpr) ){ /* If iCol is less than zero, then the expression requests the ** rowid of the sub-select or view. This expression is legal (see ** test case misc2.2.2) - it always evaluates to NULL. + ** + ** The ALWAYS() is because iCol>=pS->pEList->nExpr will have been + ** caught already by name resolution. */ NameContext sNC; Expr *p = pS->pEList->a[iCol].pExpr; @@ -108167,11 +109337,12 @@ static void selectAddColumnTypeAndCollation( for(i=0, pCol=pTab->aCol; i<pTab->nCol; i++, pCol++){ p = a[i].pExpr; if( pCol->zType==0 ){ - pCol->zType = sqlite3DbStrDup(db, columnType(&sNC, p,0,0,0, &pCol->szEst)); + pCol->zType = sqlite3DbStrDup(db, + columnType(&sNC, p,0,0,0, &pCol->szEst)); } szAll += pCol->szEst; pCol->affinity = sqlite3ExprAffinity(p); - if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_NONE; + if( pCol->affinity==0 ) pCol->affinity = SQLITE_AFF_BLOB; pColl = sqlite3ExprCollSeq(pParse, p); if( pColl && pCol->zColl==0 ){ pCol->zColl = sqlite3DbStrDup(db, pColl->zName); @@ -108327,7 +109498,10 @@ static CollSeq *multiSelectCollSeq(Parse *pParse, Select *p, int iCol){ pRet = 0; } assert( iCol>=0 ); - if( pRet==0 && iCol<p->pEList->nExpr ){ + /* iCol must be less than p->pEList->nExpr. Otherwise an error would + ** have been thrown during name resolution and we would not have gotten + ** this far */ + if( pRet==0 && ALWAYS(iCol<p->pEList->nExpr) ){ pRet = sqlite3ExprCollSeq(pParse, p->pEList->a[iCol].pExpr); } return pRet; @@ -108517,10 +109691,14 @@ static void generateWithRecursiveQuery( /* Execute the recursive SELECT taking the single row in Current as ** the value for the recursive-table. Store the results in the Queue. */ - p->pPrior = 0; - sqlite3Select(pParse, p, &destQueue); - assert( p->pPrior==0 ); - p->pPrior = pSetup; + if( p->selFlags & SF_Aggregate ){ + sqlite3ErrorMsg(pParse, "recursive aggregate queries not supported"); + }else{ + p->pPrior = 0; + sqlite3Select(pParse, p, &destQueue); + assert( p->pPrior==0 ); + p->pPrior = pSetup; + } /* Keep running the loop until the Queue is empty */ sqlite3VdbeAddOp2(v, OP_Goto, 0, addrTop); @@ -108542,19 +109720,6 @@ static int multiSelectOrderBy( SelectDest *pDest /* What to do with query results */ ); -/* -** Error message for when two or more terms of a compound select have different -** size result sets. -*/ -static void selectWrongNumTermsError(Parse *pParse, Select *p){ - if( p->selFlags & SF_Values ){ - sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms"); - }else{ - sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s" - " do not have the same number of result columns", selectOpName(p->op)); - } -} - /* ** Handle the special case of a compound-select that originates from a ** VALUES clause. By handling this as a special case, we avoid deep @@ -108572,7 +109737,6 @@ static int multiSelectValues( SelectDest *pDest /* What to do with query results */ ){ Select *pPrior; - int nExpr = p->pEList->nExpr; int nRow = 1; int rc = 0; assert( p->selFlags & SF_MultiValue ); @@ -108581,10 +109745,7 @@ static int multiSelectValues( assert( p->op==TK_ALL || (p->op==TK_SELECT && p->pPrior==0) ); assert( p->pLimit==0 ); assert( p->pOffset==0 ); - if( p->pEList->nExpr!=nExpr ){ - selectWrongNumTermsError(pParse, p); - return 1; - } + assert( p->pNext==0 || p->pEList->nExpr==p->pNext->pEList->nExpr ); if( p->pPrior==0 ) break; assert( p->pPrior->pNext==p ); p = p->pPrior; @@ -108693,11 +109854,7 @@ static int multiSelect( ** in their result sets. */ assert( p->pEList && pPrior->pEList ); - if( p->pEList->nExpr!=pPrior->pEList->nExpr ){ - selectWrongNumTermsError(pParse, p); - rc = 1; - goto multi_select_end; - } + assert( p->pEList->nExpr==pPrior->pEList->nExpr ); #ifndef SQLITE_OMIT_CTE if( p->selFlags & SF_Recursive ){ @@ -108989,6 +110146,19 @@ multi_select_end: } #endif /* SQLITE_OMIT_COMPOUND_SELECT */ +/* +** Error message for when two or more terms of a compound select have different +** size result sets. +*/ +SQLITE_PRIVATE void sqlite3SelectWrongNumTermsError(Parse *pParse, Select *p){ + if( p->selFlags & SF_Values ){ + sqlite3ErrorMsg(pParse, "all VALUES must have the same number of terms"); + }else{ + sqlite3ErrorMsg(pParse, "SELECTs to the left and right of %s" + " do not have the same number of result columns", selectOpName(p->op)); + } +} + /* ** Code an output subroutine for a coroutine implementation of a ** SELECT statment. @@ -109044,15 +110214,14 @@ static int generateOutputSubroutine( */ codeOffset(v, p->iOffset, iContinue); + assert( pDest->eDest!=SRT_Exists ); + assert( pDest->eDest!=SRT_Table ); switch( pDest->eDest ){ /* Store the result as data using a unique key. */ - case SRT_Table: case SRT_EphemTab: { int r1 = sqlite3GetTempReg(pParse); int r2 = sqlite3GetTempReg(pParse); - testcase( pDest->eDest==SRT_Table ); - testcase( pDest->eDest==SRT_EphemTab ); sqlite3VdbeAddOp3(v, OP_MakeRecord, pIn->iSdst, pIn->nSdst, r1); sqlite3VdbeAddOp2(v, OP_NewRowid, pDest->iSDParm, r2); sqlite3VdbeAddOp3(v, OP_Insert, pDest->iSDParm, r1, r2); @@ -109080,16 +110249,6 @@ static int generateOutputSubroutine( break; } -#if 0 /* Never occurs on an ORDER BY query */ - /* If any row exist in the result set, record that fact and abort. - */ - case SRT_Exists: { - sqlite3VdbeAddOp2(v, OP_Integer, 1, pDest->iSDParm); - /* The LIMIT clause will terminate the loop for us */ - break; - } -#endif - /* If this is a scalar select that is part of an expression, then ** store the results in the appropriate memory cell and break out ** of the scan loop. @@ -109327,9 +110486,7 @@ static int multiSelectOrderBy( struct ExprList_item *pItem; for(i=0, pItem=pOrderBy->a; i<nOrderBy; i++, pItem++){ assert( pItem->u.x.iOrderByCol>0 ); - /* assert( pItem->u.x.iOrderByCol<=p->pEList->nExpr ) is also true - ** but only for well-formed SELECT statements. */ - testcase( pItem->u.x.iOrderByCol > p->pEList->nExpr ); + assert( pItem->u.x.iOrderByCol<=p->pEList->nExpr ); aPermute[i] = pItem->u.x.iOrderByCol - 1; } pKeyMerge = multiSelectOrderByKeyInfo(pParse, p, 1); @@ -109688,8 +110845,8 @@ static void substSelect( ** ** (**) Restriction (10) was removed from the code on 2005-02-05 but we ** accidently carried the comment forward until 2014-09-15. Original -** text: "The subquery does not use aggregates or the outer query does not -** use LIMIT." +** text: "The subquery does not use aggregates or the outer query +** does not use LIMIT." ** ** (11) The subquery and the outer query do not both have ORDER BY clauses. ** @@ -109899,10 +111056,10 @@ static int flattenSubquery( testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ); testcase( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))==SF_Aggregate ); assert( pSub->pSrc!=0 ); + assert( pSub->pEList->nExpr==pSub1->pEList->nExpr ); if( (pSub1->selFlags & (SF_Distinct|SF_Aggregate))!=0 || (pSub1->pPrior && pSub1->op!=TK_ALL) || pSub1->pSrc->nSrc<1 - || pSub->pEList->nExpr!=pSub1->pEList->nExpr ){ return 0; } @@ -110182,7 +111339,7 @@ static int flattenSubquery( #if SELECTTRACE_ENABLED if( sqlite3SelectTrace & 0x100 ){ - sqlite3DebugPrintf("After flattening:\n"); + SELECTTRACE(0x100,pParse,p,("After flattening:\n")); sqlite3TreeViewSelect(0, p, 0); } #endif @@ -110191,6 +111348,73 @@ static int flattenSubquery( } #endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + + +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) +/* +** Make copies of relevant WHERE clause terms of the outer query into +** the WHERE clause of subquery. Example: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1) WHERE x=5 AND y=10; +** +** Transformed into: +** +** SELECT * FROM (SELECT a AS x, c-d AS y FROM t1 WHERE a=5 AND c-d=10) +** WHERE x=5 AND y=10; +** +** The hope is that the terms added to the inner query will make it more +** efficient. +** +** Do not attempt this optimization if: +** +** (1) The inner query is an aggregate. (In that case, we'd really want +** to copy the outer WHERE-clause terms onto the HAVING clause of the +** inner query. But they probably won't help there so do not bother.) +** +** (2) The inner query is the recursive part of a common table expression. +** +** (3) The inner query has a LIMIT clause (since the changes to the WHERE +** close would change the meaning of the LIMIT). +** +** (4) The inner query is the right operand of a LEFT JOIN. (The caller +** enforces this restriction since this routine does not have enough +** information to know.) +** +** Return 0 if no changes are made and non-zero if one or more WHERE clause +** terms are duplicated into the subquery. +*/ +static int pushDownWhereTerms( + sqlite3 *db, /* The database connection (for malloc()) */ + Select *pSubq, /* The subquery whose WHERE clause is to be augmented */ + Expr *pWhere, /* The WHERE clause of the outer query */ + int iCursor /* Cursor number of the subquery */ +){ + Expr *pNew; + int nChng = 0; + if( pWhere==0 ) return 0; + if( (pSubq->selFlags & (SF_Aggregate|SF_Recursive))!=0 ){ + return 0; /* restrictions (1) and (2) */ + } + if( pSubq->pLimit!=0 ){ + return 0; /* restriction (3) */ + } + while( pWhere->op==TK_AND ){ + nChng += pushDownWhereTerms(db, pSubq, pWhere->pRight, iCursor); + pWhere = pWhere->pLeft; + } + if( sqlite3ExprIsTableConstant(pWhere, iCursor) ){ + nChng++; + while( pSubq ){ + pNew = sqlite3ExprDup(db, pWhere, 0); + pNew = substExpr(db, pNew, iCursor, pSubq->pEList); + pSubq->pWhere = sqlite3ExprAnd(db, pSubq->pWhere, pNew); + pSubq = pSubq->pPrior; + } + } + return nChng; +} +#endif /* !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) */ + /* ** Based on the contents of the AggInfo structure indicated by the first ** argument, this function checks if the following are true: @@ -110274,16 +111498,16 @@ static Table *isSimpleCount(Select *p, AggInfo *pAggInfo){ ** pFrom->pIndex and return SQLITE_OK. */ SQLITE_PRIVATE int sqlite3IndexedByLookup(Parse *pParse, struct SrcList_item *pFrom){ - if( pFrom->pTab && pFrom->zIndex ){ + if( pFrom->pTab && pFrom->zIndexedBy ){ Table *pTab = pFrom->pTab; - char *zIndex = pFrom->zIndex; + char *zIndexedBy = pFrom->zIndexedBy; Index *pIdx; for(pIdx=pTab->pIndex; - pIdx && sqlite3StrICmp(pIdx->zName, zIndex); + pIdx && sqlite3StrICmp(pIdx->zName, zIndexedBy); pIdx=pIdx->pNext ); if( !pIdx ){ - sqlite3ErrorMsg(pParse, "no such index: %s", zIndex, 0); + sqlite3ErrorMsg(pParse, "no such index: %s", zIndexedBy, 0); pParse->checkSchema = 1; return SQLITE_ERROR; } @@ -110464,7 +111688,7 @@ static int withExpand( pTab->zName = sqlite3DbStrDup(db, pCte->zName); pTab->iPKey = -1; pTab->nRowLogEst = 200; assert( 200==sqlite3LogEst(1048576) ); - pTab->tabFlags |= TF_Ephemeral; + pTab->tabFlags |= TF_Ephemeral | TF_NoVisibleRowid; pFrom->pSelect = sqlite3SelectDup(db, pCte->pSelect, 0); if( db->mallocFailed ) return SQLITE_NOMEM; assert( pFrom->pSelect ); @@ -110709,13 +111933,6 @@ static int selectExpander(Walker *pWalker, Select *p){ int longNames = (flags & SQLITE_FullColNames)!=0 && (flags & SQLITE_ShortColNames)==0; - /* When processing FROM-clause subqueries, it is always the case - ** that full_column_names=OFF and short_column_names=ON. The - ** sqlite3ResultSetOfSelect() routine makes it so. */ - assert( (p->selFlags & SF_NestedFrom)==0 - || ((flags & SQLITE_FullColNames)==0 && - (flags & SQLITE_ShortColNames)!=0) ); - for(k=0; k<pEList->nExpr; k++){ pE = a[k].pExpr; pRight = pE->pRight; @@ -111094,7 +112311,7 @@ static void updateAccumulator(Parse *pParse, AggInfo *pAggInfo){ if( regHit==0 && pAggInfo->nAccumulator ) regHit = ++pParse->nMem; sqlite3VdbeAddOp4(v, OP_CollSeq, regHit, 0, 0, (char *)pColl, P4_COLLSEQ); } - sqlite3VdbeAddOp4(v, OP_AggStep, 0, regAgg, pF->iMem, + sqlite3VdbeAddOp4(v, OP_AggStep0, 0, regAgg, pF->iMem, (void*)pF->pFunc, P4_FUNCDEF); sqlite3VdbeChangeP5(v, (u8)nArg); sqlite3ExprCacheAffinityChange(pParse, regAgg, nArg); @@ -111177,7 +112394,7 @@ SQLITE_PRIVATE int sqlite3Select( WhereInfo *pWInfo; /* Return from sqlite3WhereBegin() */ Vdbe *v; /* The virtual machine under construction */ int isAgg; /* True for select lists like "count(*)" */ - ExprList *pEList; /* List of columns to extract. */ + ExprList *pEList = 0; /* List of columns to extract. */ SrcList *pTabList; /* List of tables to select from */ Expr *pWhere; /* The WHERE clause. May be NULL */ ExprList *pGroupBy; /* The GROUP BY clause. May be NULL */ @@ -111227,12 +112444,11 @@ SQLITE_PRIVATE int sqlite3Select( memset(&sSort, 0, sizeof(sSort)); sSort.pOrderBy = p->pOrderBy; pTabList = p->pSrc; - pEList = p->pEList; if( pParse->nErr || db->mallocFailed ){ goto select_end; } + assert( p->pEList!=0 ); isAgg = (p->selFlags & SF_Aggregate)!=0; - assert( pEList!=0 ); #if SELECTTRACE_ENABLED if( sqlite3SelectTrace & 0x100 ){ SELECTTRACE(0x100,pParse,p, ("after name resolution:\n")); @@ -111241,29 +112457,67 @@ SQLITE_PRIVATE int sqlite3Select( #endif - /* Begin generating code. - */ - v = sqlite3GetVdbe(pParse); - if( v==0 ) goto select_end; - /* If writing to memory or generating a set ** only a single column may be output. */ #ifndef SQLITE_OMIT_SUBQUERY - if( checkForMultiColumnSelectError(pParse, pDest, pEList->nExpr) ){ + if( checkForMultiColumnSelectError(pParse, pDest, p->pEList->nExpr) ){ goto select_end; } #endif - /* Generate code for all sub-queries in the FROM clause + /* Try to flatten subqueries in the FROM clause up into the main query */ #if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) for(i=0; !p->pPrior && i<pTabList->nSrc; i++){ struct SrcList_item *pItem = &pTabList->a[i]; - SelectDest dest; Select *pSub = pItem->pSelect; int isAggSub; + if( pSub==0 ) continue; + isAggSub = (pSub->selFlags & SF_Aggregate)!=0; + if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){ + /* This subquery can be absorbed into its parent. */ + if( isAggSub ){ + isAgg = 1; + p->selFlags |= SF_Aggregate; + } + i = -1; + } + pTabList = p->pSrc; + if( db->mallocFailed ) goto select_end; + if( !IgnorableOrderby(pDest) ){ + sSort.pOrderBy = p->pOrderBy; + } + } +#endif + + /* Get a pointer the VDBE under construction, allocating a new VDBE if one + ** does not already exist */ + v = sqlite3GetVdbe(pParse); + if( v==0 ) goto select_end; +#ifndef SQLITE_OMIT_COMPOUND_SELECT + /* Handle compound SELECT statements using the separate multiSelect() + ** procedure. + */ + if( p->pPrior ){ + rc = multiSelect(pParse, p, pDest); + explainSetInteger(pParse->iSelectId, iRestoreSelectId); +#if SELECTTRACE_ENABLED + SELECTTRACE(1,pParse,p,("end compound-select processing\n")); + pParse->nSelectIndent--; +#endif + return rc; + } +#endif + + /* Generate code for all sub-queries in the FROM clause + */ +#if !defined(SQLITE_OMIT_SUBQUERY) || !defined(SQLITE_OMIT_VIEW) + for(i=0; i<pTabList->nSrc; i++){ + struct SrcList_item *pItem = &pTabList->a[i]; + SelectDest dest; + Select *pSub = pItem->pSelect; if( pSub==0 ) continue; /* Sometimes the code for a subquery will be generated more than @@ -111288,16 +112542,25 @@ SQLITE_PRIVATE int sqlite3Select( */ pParse->nHeight += sqlite3SelectExprHeight(p); - isAggSub = (pSub->selFlags & SF_Aggregate)!=0; - if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){ - /* This subquery can be absorbed into its parent. */ - if( isAggSub ){ - isAgg = 1; - p->selFlags |= SF_Aggregate; + /* Make copies of constant WHERE-clause terms in the outer query down + ** inside the subquery. This can help the subquery to run more efficiently. + */ + if( (pItem->jointype & JT_OUTER)==0 + && pushDownWhereTerms(db, pSub, p->pWhere, pItem->iCursor) + ){ +#if SELECTTRACE_ENABLED + if( sqlite3SelectTrace & 0x100 ){ + SELECTTRACE(0x100,pParse,p,("After WHERE-clause push-down:\n")); + sqlite3TreeViewSelect(0, p, 0); } - i = -1; - }else if( pTabList->nSrc==1 - && OptimizationEnabled(db, SQLITE_SubqCoroutine) +#endif + } + + /* Generate code to implement the subquery + */ + if( pTabList->nSrc==1 + && (p->selFlags & SF_All)==0 + && OptimizationEnabled(db, SQLITE_SubqCoroutine) ){ /* Implement a co-routine that will return a single row of the result ** set on each invocation. @@ -111348,33 +112611,23 @@ SQLITE_PRIVATE int sqlite3Select( sqlite3VdbeChangeP1(v, topAddr, retAddr); sqlite3ClearTempRegCache(pParse); } - if( /*pParse->nErr ||*/ db->mallocFailed ){ - goto select_end; - } + if( db->mallocFailed ) goto select_end; pParse->nHeight -= sqlite3SelectExprHeight(p); - pTabList = p->pSrc; - if( !IgnorableOrderby(pDest) ){ - sSort.pOrderBy = p->pOrderBy; - } } - pEList = p->pEList; #endif + + /* Various elements of the SELECT copied into local variables for + ** convenience */ + pEList = p->pEList; pWhere = p->pWhere; pGroupBy = p->pGroupBy; pHaving = p->pHaving; sDistinct.isTnct = (p->selFlags & SF_Distinct)!=0; -#ifndef SQLITE_OMIT_COMPOUND_SELECT - /* If there is are a sequence of queries, do the earlier ones first. - */ - if( p->pPrior ){ - rc = multiSelect(pParse, p, pDest); - explainSetInteger(pParse->iSelectId, iRestoreSelectId); #if SELECTTRACE_ENABLED - SELECTTRACE(1,pParse,p,("end compound-select processing\n")); - pParse->nSelectIndent--; -#endif - return rc; + if( sqlite3SelectTrace & 0x400 ){ + SELECTTRACE(0x400,pParse,p,("After all FROM-clause analysis:\n")); + sqlite3TreeViewSelect(0, p, 0); } #endif @@ -111394,23 +112647,23 @@ SQLITE_PRIVATE int sqlite3Select( ** BY and DISTINCT, and an index or separate temp-table for the other. */ if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct - && sqlite3ExprListCompare(sSort.pOrderBy, p->pEList, -1)==0 + && sqlite3ExprListCompare(sSort.pOrderBy, pEList, -1)==0 ){ p->selFlags &= ~SF_Distinct; - p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0); - pGroupBy = p->pGroupBy; + pGroupBy = p->pGroupBy = sqlite3ExprListDup(db, pEList, 0); /* Notice that even thought SF_Distinct has been cleared from p->selFlags, ** the sDistinct.isTnct is still set. Hence, isTnct represents the ** original setting of the SF_Distinct flag, not the current setting */ assert( sDistinct.isTnct ); } - /* If there is an ORDER BY clause, then this sorting - ** index might end up being unused if the data can be - ** extracted in pre-sorted order. If that is the case, then the - ** OP_OpenEphemeral instruction will be changed to an OP_Noop once - ** we figure out that the sorting index is not needed. The addrSortIndex - ** variable is used to facilitate that change. + /* If there is an ORDER BY clause, then create an ephemeral index to + ** do the sorting. But this sorting ephemeral index might end up + ** being unused if the data can be extracted in pre-sorted order. + ** If that is the case, then the OP_OpenEphemeral instruction will be + ** changed to an OP_Noop once we figure out that the sorting index is + ** not needed. The sSort.addrSortIndex variable is used to facilitate + ** that change. */ if( sSort.pOrderBy ){ KeyInfo *pKeyInfo; @@ -111441,14 +112694,14 @@ SQLITE_PRIVATE int sqlite3Select( sSort.sortFlags |= SORTFLAG_UseSorter; } - /* Open a virtual index to use for the distinct set. + /* Open an ephemeral index to use for the distinct set. */ if( p->selFlags & SF_Distinct ){ sDistinct.tabTnct = pParse->nTab++; sDistinct.addrTnct = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, - sDistinct.tabTnct, 0, 0, - (char*)keyInfoFromExprList(pParse, p->pEList,0,0), - P4_KEYINFO); + sDistinct.tabTnct, 0, 0, + (char*)keyInfoFromExprList(pParse, p->pEList,0,0), + P4_KEYINFO); sqlite3VdbeChangeP5(v, BTREE_UNORDERED); sDistinct.eTnctType = WHERE_DISTINCT_UNORDERED; }else{ @@ -111526,11 +112779,10 @@ SQLITE_PRIVATE int sqlite3Select( p->nSelectRow = 1; } - /* If there is both a GROUP BY and an ORDER BY clause and they are ** identical, then it may be possible to disable the ORDER BY clause ** on the grounds that the GROUP BY will cause elements to come out - ** in the correct order. It also may not - the GROUP BY may use a + ** in the correct order. It also may not - the GROUP BY might use a ** database index that causes rows to be grouped together as required ** but not actually sorted. Either way, record the fact that the ** ORDER BY and GROUP BY clauses are the same by setting the orderByGrp @@ -111708,7 +112960,8 @@ SQLITE_PRIVATE int sqlite3Select( addrTopOfLoop = sqlite3VdbeCurrentAddr(v); sqlite3ExprCacheClear(pParse); if( groupBySort ){ - sqlite3VdbeAddOp3(v, OP_SorterData, sAggInfo.sortingIdx, sortOut,sortPTab); + sqlite3VdbeAddOp3(v, OP_SorterData, sAggInfo.sortingIdx, + sortOut, sortPTab); } for(j=0; j<pGroupBy->nExpr; j++){ if( groupBySort ){ @@ -111780,7 +113033,8 @@ SQLITE_PRIVATE int sqlite3Select( sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); sqlite3VdbeResolveLabel(v, addrOutputRow); addrOutputRow = sqlite3VdbeCurrentAddr(v); - sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); VdbeCoverage(v); + sqlite3VdbeAddOp2(v, OP_IfPos, iUseFlag, addrOutputRow+2); + VdbeCoverage(v); VdbeComment((v, "Groupby result generator entry point")); sqlite3VdbeAddOp1(v, OP_Return, regOutputRow); finalizeAggFunctions(pParse, &sAggInfo); @@ -111944,7 +113198,8 @@ SQLITE_PRIVATE int sqlite3Select( ** and send them to the callback one by one. */ if( sSort.pOrderBy ){ - explainTempTable(pParse, sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY"); + explainTempTable(pParse, + sSort.nOBSat>0 ? "RIGHT PART OF ORDER BY":"ORDER BY"); generateSortTail(pParse, p, &sSort, pEList->nExpr, pDest); } @@ -111977,100 +113232,6 @@ select_end: return rc; } -#ifdef SQLITE_DEBUG -/* -** Generate a human-readable description of a the Select object. -*/ -SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 moreToFollow){ - int n = 0; - pView = sqlite3TreeViewPush(pView, moreToFollow); - sqlite3TreeViewLine(pView, "SELECT%s%s (0x%p)", - ((p->selFlags & SF_Distinct) ? " DISTINCT" : ""), - ((p->selFlags & SF_Aggregate) ? " agg_flag" : ""), p - ); - if( p->pSrc && p->pSrc->nSrc ) n++; - if( p->pWhere ) n++; - if( p->pGroupBy ) n++; - if( p->pHaving ) n++; - if( p->pOrderBy ) n++; - if( p->pLimit ) n++; - if( p->pOffset ) n++; - if( p->pPrior ) n++; - sqlite3TreeViewExprList(pView, p->pEList, (n--)>0, "result-set"); - if( p->pSrc && p->pSrc->nSrc ){ - int i; - pView = sqlite3TreeViewPush(pView, (n--)>0); - sqlite3TreeViewLine(pView, "FROM"); - for(i=0; i<p->pSrc->nSrc; i++){ - struct SrcList_item *pItem = &p->pSrc->a[i]; - StrAccum x; - char zLine[100]; - sqlite3StrAccumInit(&x, 0, zLine, sizeof(zLine), 0); - sqlite3XPrintf(&x, 0, "{%d,*}", pItem->iCursor); - if( pItem->zDatabase ){ - sqlite3XPrintf(&x, 0, " %s.%s", pItem->zDatabase, pItem->zName); - }else if( pItem->zName ){ - sqlite3XPrintf(&x, 0, " %s", pItem->zName); - } - if( pItem->pTab ){ - sqlite3XPrintf(&x, 0, " tabname=%Q", pItem->pTab->zName); - } - if( pItem->zAlias ){ - sqlite3XPrintf(&x, 0, " (AS %s)", pItem->zAlias); - } - if( pItem->jointype & JT_LEFT ){ - sqlite3XPrintf(&x, 0, " LEFT-JOIN"); - } - sqlite3StrAccumFinish(&x); - sqlite3TreeViewItem(pView, zLine, i<p->pSrc->nSrc-1); - if( pItem->pSelect ){ - sqlite3TreeViewSelect(pView, pItem->pSelect, 0); - } - sqlite3TreeViewPop(pView); - } - sqlite3TreeViewPop(pView); - } - if( p->pWhere ){ - sqlite3TreeViewItem(pView, "WHERE", (n--)>0); - sqlite3TreeViewExpr(pView, p->pWhere, 0); - sqlite3TreeViewPop(pView); - } - if( p->pGroupBy ){ - sqlite3TreeViewExprList(pView, p->pGroupBy, (n--)>0, "GROUPBY"); - } - if( p->pHaving ){ - sqlite3TreeViewItem(pView, "HAVING", (n--)>0); - sqlite3TreeViewExpr(pView, p->pHaving, 0); - sqlite3TreeViewPop(pView); - } - if( p->pOrderBy ){ - sqlite3TreeViewExprList(pView, p->pOrderBy, (n--)>0, "ORDERBY"); - } - if( p->pLimit ){ - sqlite3TreeViewItem(pView, "LIMIT", (n--)>0); - sqlite3TreeViewExpr(pView, p->pLimit, 0); - sqlite3TreeViewPop(pView); - } - if( p->pOffset ){ - sqlite3TreeViewItem(pView, "OFFSET", (n--)>0); - sqlite3TreeViewExpr(pView, p->pOffset, 0); - sqlite3TreeViewPop(pView); - } - if( p->pPrior ){ - const char *zOp = "UNION"; - switch( p->op ){ - case TK_ALL: zOp = "UNION ALL"; break; - case TK_INTERSECT: zOp = "INTERSECT"; break; - case TK_EXCEPT: zOp = "EXCEPT"; break; - } - sqlite3TreeViewItem(pView, zOp, (n--)>0); - sqlite3TreeViewSelect(pView, p->pPrior, 0); - sqlite3TreeViewPop(pView); - } - sqlite3TreeViewPop(pView); -} -#endif /* SQLITE_DEBUG */ - /************** End of select.c **********************************************/ /************** Begin file table.c *******************************************/ /* @@ -112091,6 +113252,7 @@ SQLITE_PRIVATE void sqlite3TreeViewSelect(TreeView *pView, const Select *p, u8 m ** These routines are in a separate files so that they will not be linked ** if they are not used. */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* #include <string.h> */ @@ -112287,6 +113449,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_free_table( ************************************************************************* ** This file contains the implementation for TRIGGERs */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_TRIGGER /* @@ -113410,6 +114573,7 @@ SQLITE_PRIVATE u32 sqlite3TriggerColmask( ** This file contains C code routines that are called by the parser ** to handle UPDATE statements. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_VIRTUALTABLE /* Forward declaration */ @@ -114140,12 +115304,10 @@ static void updateVirtualTable( */ assert( v ); ephemTab = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenEphemeral, ephemTab, pTab->nCol+1+(pRowid!=0)); - sqlite3VdbeChangeP5(v, BTREE_UNORDERED); /* fill the ephemeral table */ - sqlite3SelectDestInit(&dest, SRT_Table, ephemTab); + sqlite3SelectDestInit(&dest, SRT_EphemTab, ephemTab); sqlite3Select(pParse, pSelect, &dest); /* Generate code to scan the ephemeral table and call VUpdate. */ @@ -114188,6 +115350,8 @@ static void updateVirtualTable( ** Most of the code in this file may be omitted by defining the ** SQLITE_OMIT_VACUUM macro. */ +/* #include "sqliteInt.h" */ +/* #include "vdbeInt.h" */ #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* @@ -114560,6 +115724,7 @@ end_of_vacuum: ** This file contains code used to help implement virtual tables. */ #ifndef SQLITE_OMIT_VIRTUALTABLE +/* #include "sqliteInt.h" */ /* ** Before a virtual table xCreate() or xConnect() method is invoked, the @@ -115384,8 +116549,10 @@ SQLITE_PRIVATE int sqlite3VtabCallDestroy(sqlite3 *db, int iDb, const char *zTab static void callFinaliser(sqlite3 *db, int offset){ int i; if( db->aVTrans ){ + VTable **aVTrans = db->aVTrans; + db->aVTrans = 0; for(i=0; i<db->nVTrans; i++){ - VTable *pVTab = db->aVTrans[i]; + VTable *pVTab = aVTrans[i]; sqlite3_vtab *p = pVTab->pVtab; if( p ){ int (*x)(sqlite3_vtab *); @@ -115395,9 +116562,8 @@ static void callFinaliser(sqlite3 *db, int offset){ pVTab->iSavepoint = 0; sqlite3VtabUnlock(pVTab); } - sqlite3DbFree(db, db->aVTrans); + sqlite3DbFree(db, aVTrans); db->nVTrans = 0; - db->aVTrans = 0; } } @@ -115697,9 +116863,9 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ #endif /* SQLITE_OMIT_VIRTUALTABLE */ /************** End of vtab.c ************************************************/ -/************** Begin file where.c *******************************************/ +/************** Begin file wherecode.c ***************************************/ /* -** 2001 September 15 +** 2015-06-06 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -115710,13 +116876,15 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ ** ************************************************************************* ** This module contains C code that generates VDBE code used to process -** the WHERE clause of SQL statements. This module is responsible for -** generating the code that loops through a table looking for applicable -** rows. Indices are selected and used to speed the search when doing -** so is applicable. Because this module is responsible for selecting -** indices, you might also think of this module as the "query optimizer". +** the WHERE clause of SQL statements. +** +** This file was split off from where.c on 2015-06-06 in order to reduce the +** size of where.c and make it easier to edit. This file contains the routines +** that actually generate the bulk of the WHERE loop code. The original where.c +** file retains the code that does query planning and analysis. */ -/************** Include whereInt.h in the middle of where.c ******************/ +/* #include "sqliteInt.h" */ +/************** Include whereInt.h in the middle of wherecode.c **************/ /************** Begin file whereInt.h ****************************************/ /* ** 2013-11-12 @@ -115739,7 +116907,7 @@ SQLITE_API int SQLITE_CDECL sqlite3_vtab_config(sqlite3 *db, int op, ...){ ** Trace output macros */ #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) -/***/ int sqlite3WhereTrace = 0; +/***/ int sqlite3WhereTrace; #endif #if defined(SQLITE_DEBUG) \ && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_WHERETRACE)) @@ -115881,10 +117049,6 @@ struct WhereOrSet { WhereOrCost a[N_OR_COST]; /* Set of best costs */ }; - -/* Forward declaration of methods */ -static int whereLoopResize(sqlite3*, WhereLoop*, int); - /* ** Each instance of this object holds a sequence of WhereLoop objects ** that implement some or all of a query plan. @@ -116000,6 +117164,7 @@ struct WhereTerm { #define TERM_LIKEOPT 0x100 /* Virtual terms from the LIKE optimization */ #define TERM_LIKECOND 0x200 /* Conditionally this LIKE operator term */ #define TERM_LIKE 0x400 /* The original LIKE operator */ +#define TERM_IS 0x800 /* Term.pExpr is an IS operator */ /* ** An instance of the WhereScan object is used as an iterator for locating @@ -116091,6 +117256,11 @@ struct WhereMaskSet { int ix[BMS]; /* Cursor assigned to each bit */ }; +/* +** Initialize a WhereMaskSet object +*/ +#define initMaskSet(P) (P)->n=0 + /* ** This object is a convenience wrapper holding all information needed ** to construct WhereLoop objects for a particular query. @@ -116142,27 +117312,84 @@ struct WhereInfo { WhereLevel a[1]; /* Information about each nest loop in WHERE */ }; +/* +** Private interfaces - callable only by other where.c routines. +** +** where.c: +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet*,int); +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ +); + +/* wherecode.c: */ +#ifndef SQLITE_OMIT_EXPLAIN +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + int iLevel, /* Value for "level" column of output */ + int iFrom, /* Value for "from" column of output */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +); +#else +# define sqlite3WhereExplainOneScan(u,v,w,x,y,z) 0 +#endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +); +#else +# define sqlite3WhereAddScanStatus(a, b, c, d) ((void)d) +#endif +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + Bitmask notReady /* Which tables are currently available */ +); + +/* whereexpr.c: */ +SQLITE_PRIVATE void sqlite3WhereClauseInit(WhereClause*,WhereInfo*); +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause*); +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause*,Expr*,u8); +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet*, Expr*); +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet*, ExprList*); +SQLITE_PRIVATE void sqlite3WhereExprAnalyze(SrcList*, WhereClause*); + + + + + /* ** Bitmasks for the operators on WhereTerm objects. These are all ** operators that are of interest to the query planner. An ** OR-ed combination of these values can be used when searching for ** particular WhereTerms within a WhereClause. */ -#define WO_IN 0x001 -#define WO_EQ 0x002 +#define WO_IN 0x0001 +#define WO_EQ 0x0002 #define WO_LT (WO_EQ<<(TK_LT-TK_EQ)) #define WO_LE (WO_EQ<<(TK_LE-TK_EQ)) #define WO_GT (WO_EQ<<(TK_GT-TK_EQ)) #define WO_GE (WO_EQ<<(TK_GE-TK_EQ)) -#define WO_MATCH 0x040 -#define WO_ISNULL 0x080 -#define WO_OR 0x100 /* Two or more OR-connected terms */ -#define WO_AND 0x200 /* Two or more AND-connected terms */ -#define WO_EQUIV 0x400 /* Of the form A==B, both columns */ -#define WO_NOOP 0x800 /* This term does not restrict search space */ +#define WO_MATCH 0x0040 +#define WO_IS 0x0080 +#define WO_ISNULL 0x0100 +#define WO_OR 0x0200 /* Two or more OR-connected terms */ +#define WO_AND 0x0400 /* Two or more AND-connected terms */ +#define WO_EQUIV 0x0800 /* Of the form A==B, both columns */ +#define WO_NOOP 0x1000 /* This term does not restrict search space */ -#define WO_ALL 0xfff /* Mask of all possible WO_* values */ -#define WO_SINGLE 0x0ff /* Mask of all non-compound WO_* values */ +#define WO_ALL 0x1fff /* Mask of all possible WO_* values */ +#define WO_SINGLE 0x01ff /* Mask of all non-compound WO_* values */ /* ** These are definitions of bits in the WhereLoop.wsFlags field. @@ -116190,624 +117417,1683 @@ struct WhereInfo { #define WHERE_PARTIALIDX 0x00020000 /* The automatic index is partial */ /************** End of whereInt.h ********************************************/ -/************** Continuing where we left off in where.c **********************/ - -/* -** Return the estimated number of output rows from a WHERE clause -*/ -SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ - return sqlite3LogEstToInt(pWInfo->nRowOut); -} - -/* -** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this -** WHERE clause returns outputs for DISTINCT processing. -*/ -SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ - return pWInfo->eDistinct; -} - -/* -** Return TRUE if the WHERE clause returns rows in ORDER BY order. -** Return FALSE if the output needs to be sorted. -*/ -SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ - return pWInfo->nOBSat; -} - -/* -** Return the VDBE address or label to jump to in order to continue -** immediately with the next row of a WHERE clause. -*/ -SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){ - assert( pWInfo->iContinue!=0 ); - return pWInfo->iContinue; -} +/************** Continuing where we left off in wherecode.c ******************/ +#ifndef SQLITE_OMIT_EXPLAIN /* -** Return the VDBE address or label to jump to in order to break -** out of a WHERE loop. +** This routine is a helper for explainIndexRange() below +** +** pStr holds the text of an expression that we are building up one term +** at a time. This routine adds a new term to the end of the expression. +** Terms are separated by AND so add the "AND" text for second and subsequent +** terms only. */ -SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ - return pWInfo->iBreak; +static void explainAppendTerm( + StrAccum *pStr, /* The text expression being built */ + int iTerm, /* Index of this term. First is zero */ + const char *zColumn, /* Name of the column */ + const char *zOp /* Name of the operator */ +){ + if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5); + sqlite3StrAccumAppendAll(pStr, zColumn); + sqlite3StrAccumAppend(pStr, zOp, 1); + sqlite3StrAccumAppend(pStr, "?", 1); } /* -** Return TRUE if an UPDATE or DELETE statement can operate directly on -** the rowids returned by a WHERE clause. Return FALSE if doing an -** UPDATE or DELETE might change subsequent WHERE clause results. +** Argument pLevel describes a strategy for scanning table pTab. This +** function appends text to pStr that describes the subset of table +** rows scanned by the strategy in the form of an SQL expression. ** -** If the ONEPASS optimization is used (if this routine returns true) -** then also write the indices of open cursors used by ONEPASS -** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data -** table and iaCur[1] gets the cursor used by an auxiliary index. -** Either value may be -1, indicating that cursor is not used. -** Any cursors returned will have been opened for writing. +** For example, if the query: ** -** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is -** unable to use the ONEPASS optimization. -*/ -SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ - memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); - return pWInfo->okOnePass; -} - -/* -** Move the content of pSrc into pDest -*/ -static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ - pDest->n = pSrc->n; - memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0])); -} - -/* -** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet. +** SELECT * FROM t1 WHERE a=1 AND b>2; ** -** The new entry might overwrite an existing entry, or it might be -** appended, or it might be discarded. Do whatever is the right thing -** so that pSet keeps the N_OR_COST best entries seen so far. +** is run and there is an index on (a, b), then this function returns a +** string similar to: +** +** "a=? AND b>?" */ -static int whereOrInsert( - WhereOrSet *pSet, /* The WhereOrSet to be updated */ - Bitmask prereq, /* Prerequisites of the new entry */ - LogEst rRun, /* Run-cost of the new entry */ - LogEst nOut /* Number of outputs for the new entry */ -){ - u16 i; - WhereOrCost *p; - for(i=pSet->n, p=pSet->a; i>0; i--, p++){ - if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ - goto whereOrInsert_done; - } - if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){ - return 0; +static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ + Index *pIndex = pLoop->u.btree.pIndex; + u16 nEq = pLoop->u.btree.nEq; + u16 nSkip = pLoop->nSkip; + int i, j; + Column *aCol = pTab->aCol; + i16 *aiColumn = pIndex->aiColumn; + + if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; + sqlite3StrAccumAppend(pStr, " (", 2); + for(i=0; i<nEq; i++){ + char *z = aiColumn[i] < 0 ? "rowid" : aCol[aiColumn[i]].zName; + if( i>=nSkip ){ + explainAppendTerm(pStr, i, z, "="); + }else{ + if( i ) sqlite3StrAccumAppend(pStr, " AND ", 5); + sqlite3XPrintf(pStr, 0, "ANY(%s)", z); } } - if( pSet->n<N_OR_COST ){ - p = &pSet->a[pSet->n++]; - p->nOut = nOut; - }else{ - p = pSet->a; - for(i=1; i<pSet->n; i++){ - if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i; - } - if( p->rRun<=rRun ) return 0; + + j = i; + if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ + char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + explainAppendTerm(pStr, i++, z, ">"); } -whereOrInsert_done: - p->prereq = prereq; - p->rRun = rRun; - if( p->nOut>nOut ) p->nOut = nOut; - return 1; + if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ + char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; + explainAppendTerm(pStr, i, z, "<"); + } + sqlite3StrAccumAppend(pStr, ")", 1); } /* -** Initialize a preallocated WhereClause structure. +** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN +** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was +** defined at compile-time. If it is not a no-op, a single OP_Explain opcode +** is added to the output to describe the table scan strategy in pLevel. +** +** If an OP_Explain opcode is added to the VM, its address is returned. +** Otherwise, if no OP_Explain is coded, zero is returned. */ -static void whereClauseInit( - WhereClause *pWC, /* The WhereClause to be initialized */ - WhereInfo *pWInfo /* The WHERE processing context */ +SQLITE_PRIVATE int sqlite3WhereExplainOneScan( + Parse *pParse, /* Parse context */ + SrcList *pTabList, /* Table list this loop refers to */ + WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ + int iLevel, /* Value for "level" column of output */ + int iFrom, /* Value for "from" column of output */ + u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ ){ - pWC->pWInfo = pWInfo; - pWC->pOuter = 0; - pWC->nTerm = 0; - pWC->nSlot = ArraySize(pWC->aStatic); - pWC->a = pWC->aStatic; -} - -/* Forward reference */ -static void whereClauseClear(WhereClause*); + int ret = 0; +#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) + if( pParse->explain==2 ) +#endif + { + struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom]; + Vdbe *v = pParse->pVdbe; /* VM being constructed */ + sqlite3 *db = pParse->db; /* Database handle */ + int iId = pParse->iSelectId; /* Select id (left-most output column) */ + int isSearch; /* True for a SEARCH. False for SCAN. */ + WhereLoop *pLoop; /* The controlling WhereLoop object */ + u32 flags; /* Flags that describe this loop */ + char *zMsg; /* Text to add to EQP output */ + StrAccum str; /* EQP output string */ + char zBuf[100]; /* Initial space for EQP output string */ -/* -** Deallocate all memory associated with a WhereOrInfo object. -*/ -static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ - whereClauseClear(&p->wc); - sqlite3DbFree(db, p); -} + pLoop = pLevel->pWLoop; + flags = pLoop->wsFlags; + if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0; -/* -** Deallocate all memory associated with a WhereAndInfo object. -*/ -static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ - whereClauseClear(&p->wc); - sqlite3DbFree(db, p); -} + isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 + || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) + || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX)); -/* -** Deallocate a WhereClause structure. The WhereClause structure -** itself is not freed. This routine is the inverse of whereClauseInit(). -*/ -static void whereClauseClear(WhereClause *pWC){ - int i; - WhereTerm *a; - sqlite3 *db = pWC->pWInfo->pParse->db; - for(i=pWC->nTerm-1, a=pWC->a; i>=0; i--, a++){ - if( a->wtFlags & TERM_DYNAMIC ){ - sqlite3ExprDelete(db, a->pExpr); + sqlite3StrAccumInit(&str, db, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); + sqlite3StrAccumAppendAll(&str, isSearch ? "SEARCH" : "SCAN"); + if( pItem->pSelect ){ + sqlite3XPrintf(&str, 0, " SUBQUERY %d", pItem->iSelectId); + }else{ + sqlite3XPrintf(&str, 0, " TABLE %s", pItem->zName); } - if( a->wtFlags & TERM_ORINFO ){ - whereOrInfoDelete(db, a->u.pOrInfo); - }else if( a->wtFlags & TERM_ANDINFO ){ - whereAndInfoDelete(db, a->u.pAndInfo); + + if( pItem->zAlias ){ + sqlite3XPrintf(&str, 0, " AS %s", pItem->zAlias); } - } - if( pWC->a!=pWC->aStatic ){ - sqlite3DbFree(db, pWC->a); - } -} + if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){ + const char *zFmt = 0; + Index *pIdx; -/* -** Add a single new WhereTerm entry to the WhereClause object pWC. -** The new WhereTerm object is constructed from Expr p and with wtFlags. -** The index in pWC->a[] of the new WhereTerm is returned on success. -** 0 is returned if the new WhereTerm could not be added due to a memory -** allocation error. The memory allocation failure will be recorded in -** the db->mallocFailed flag so that higher-level functions can detect it. -** -** This routine will increase the size of the pWC->a[] array as necessary. -** -** If the wtFlags argument includes TERM_DYNAMIC, then responsibility -** for freeing the expression p is assumed by the WhereClause object pWC. -** This is true even if this routine fails to allocate a new WhereTerm. -** -** WARNING: This routine might reallocate the space used to store -** WhereTerms. All pointers to WhereTerms should be invalidated after -** calling this routine. Such pointers may be reinitialized by referencing -** the pWC->a[] array. -*/ -static int whereClauseInsert(WhereClause *pWC, Expr *p, u16 wtFlags){ - WhereTerm *pTerm; - int idx; - testcase( wtFlags & TERM_VIRTUAL ); - if( pWC->nTerm>=pWC->nSlot ){ - WhereTerm *pOld = pWC->a; - sqlite3 *db = pWC->pWInfo->pParse->db; - pWC->a = sqlite3DbMallocRaw(db, sizeof(pWC->a[0])*pWC->nSlot*2 ); - if( pWC->a==0 ){ - if( wtFlags & TERM_DYNAMIC ){ - sqlite3ExprDelete(db, p); + assert( pLoop->u.btree.pIndex!=0 ); + pIdx = pLoop->u.btree.pIndex; + assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) ); + if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){ + if( isSearch ){ + zFmt = "PRIMARY KEY"; + } + }else if( flags & WHERE_PARTIALIDX ){ + zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; + }else if( flags & WHERE_AUTO_INDEX ){ + zFmt = "AUTOMATIC COVERING INDEX"; + }else if( flags & WHERE_IDX_ONLY ){ + zFmt = "COVERING INDEX %s"; + }else{ + zFmt = "INDEX %s"; } - pWC->a = pOld; - return 0; + if( zFmt ){ + sqlite3StrAccumAppend(&str, " USING ", 7); + sqlite3XPrintf(&str, 0, zFmt, pIdx->zName); + explainIndexRange(&str, pLoop, pItem->pTab); + } + }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){ + const char *zRange; + if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){ + zRange = "(rowid=?)"; + }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){ + zRange = "(rowid>? AND rowid<?)"; + }else if( flags&WHERE_BTM_LIMIT ){ + zRange = "(rowid>?)"; + }else{ + assert( flags&WHERE_TOP_LIMIT); + zRange = "(rowid<?)"; + } + sqlite3StrAccumAppendAll(&str, " USING INTEGER PRIMARY KEY "); + sqlite3StrAccumAppendAll(&str, zRange); } - memcpy(pWC->a, pOld, sizeof(pWC->a[0])*pWC->nTerm); - if( pOld!=pWC->aStatic ){ - sqlite3DbFree(db, pOld); +#ifndef SQLITE_OMIT_VIRTUALTABLE + else if( (flags & WHERE_VIRTUALTABLE)!=0 ){ + sqlite3XPrintf(&str, 0, " VIRTUAL TABLE INDEX %d:%s", + pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr); } - pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); - memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm)); - } - pTerm = &pWC->a[idx = pWC->nTerm++]; - if( p && ExprHasProperty(p, EP_Unlikely) ){ - pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; - }else{ - pTerm->truthProb = 1; +#endif +#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS + if( pLoop->nOut>=10 ){ + sqlite3XPrintf(&str, 0, " (~%llu rows)", sqlite3LogEstToInt(pLoop->nOut)); + }else{ + sqlite3StrAccumAppend(&str, " (~1 row)", 9); + } +#endif + zMsg = sqlite3StrAccumFinish(&str); + ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC); } - pTerm->pExpr = sqlite3ExprSkipCollate(p); - pTerm->wtFlags = wtFlags; - pTerm->pWC = pWC; - pTerm->iParent = -1; - return idx; + return ret; } +#endif /* SQLITE_OMIT_EXPLAIN */ +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS /* -** This routine identifies subexpressions in the WHERE clause where -** each subexpression is separated by the AND operator or some other -** operator specified in the op parameter. The WhereClause structure -** is filled with pointers to subexpressions. For example: -** -** WHERE a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22) -** \________/ \_______________/ \________________/ -** slot[0] slot[1] slot[2] -** -** The original WHERE clause in pExpr is unaltered. All this routine -** does is make slot[] entries point to substructure within pExpr. +** Configure the VM passed as the first argument with an +** sqlite3_stmt_scanstatus() entry corresponding to the scan used to +** implement level pLvl. Argument pSrclist is a pointer to the FROM +** clause that the scan reads data from. ** -** In the previous sentence and in the diagram, "slot[]" refers to -** the WhereClause.a[] array. The slot[] array grows as needed to contain -** all terms of the WHERE clause. +** If argument addrExplain is not 0, it must be the address of an +** OP_Explain instruction that describes the same loop. */ -static void whereSplit(WhereClause *pWC, Expr *pExpr, u8 op){ - Expr *pE2 = sqlite3ExprSkipCollate(pExpr); - pWC->op = op; - if( pE2==0 ) return; - if( pE2->op!=op ){ - whereClauseInsert(pWC, pExpr, 0); +SQLITE_PRIVATE void sqlite3WhereAddScanStatus( + Vdbe *v, /* Vdbe to add scanstatus entry to */ + SrcList *pSrclist, /* FROM clause pLvl reads data from */ + WhereLevel *pLvl, /* Level to add scanstatus() entry for */ + int addrExplain /* Address of OP_Explain (or 0) */ +){ + const char *zObj = 0; + WhereLoop *pLoop = pLvl->pWLoop; + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ + zObj = pLoop->u.btree.pIndex->zName; }else{ - whereSplit(pWC, pE2->pLeft, op); - whereSplit(pWC, pE2->pRight, op); + zObj = pSrclist->a[pLvl->iFrom].zName; } + sqlite3VdbeScanStatus( + v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj + ); } +#endif -/* -** Initialize a WhereMaskSet object -*/ -#define initMaskSet(P) (P)->n=0 /* -** Return the bitmask for the given cursor number. Return 0 if -** iCursor is not in the set. +** Disable a term in the WHERE clause. Except, do not disable the term +** if it controls a LEFT OUTER JOIN and it did not originate in the ON +** or USING clause of that join. +** +** Consider the term t2.z='ok' in the following queries: +** +** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok' +** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok' +** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok' +** +** The t2.z='ok' is disabled in the in (2) because it originates +** in the ON clause. The term is disabled in (3) because it is not part +** of a LEFT OUTER JOIN. In (1), the term is not disabled. +** +** Disabling a term causes that term to not be tested in the inner loop +** of the join. Disabling is an optimization. When terms are satisfied +** by indices, we disable them to prevent redundant tests in the inner +** loop. We would get the correct results if nothing were ever disabled, +** but joins might run a little slower. The trick is to disable as much +** as we can without disabling too much. If we disabled in (1), we'd get +** the wrong answer. See ticket #813. +** +** If all the children of a term are disabled, then that term is also +** automatically disabled. In this way, terms get disabled if derived +** virtual terms are tested first. For example: +** +** x GLOB 'abc*' AND x>='abc' AND x<'acd' +** \___________/ \______/ \_____/ +** parent child1 child2 +** +** Only the parent term was in the original WHERE clause. The child1 +** and child2 terms were added by the LIKE optimization. If both of +** the virtual child terms are valid, then testing of the parent can be +** skipped. +** +** Usually the parent term is marked as TERM_CODED. But if the parent +** term was originally TERM_LIKE, then the parent gets TERM_LIKECOND instead. +** The TERM_LIKECOND marking indicates that the term should be coded inside +** a conditional such that is only evaluated on the second pass of a +** LIKE-optimization loop, when scanning BLOBs instead of strings. */ -static Bitmask getMask(WhereMaskSet *pMaskSet, int iCursor){ - int i; - assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 ); - for(i=0; i<pMaskSet->n; i++){ - if( pMaskSet->ix[i]==iCursor ){ - return MASKBIT(i); +static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ + int nLoop = 0; + while( pTerm + && (pTerm->wtFlags & TERM_CODED)==0 + && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + && (pLevel->notReady & pTerm->prereqAll)==0 + ){ + if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ + pTerm->wtFlags |= TERM_LIKECOND; + }else{ + pTerm->wtFlags |= TERM_CODED; } + if( pTerm->iParent<0 ) break; + pTerm = &pTerm->pWC->a[pTerm->iParent]; + pTerm->nChild--; + if( pTerm->nChild!=0 ) break; + nLoop++; } - return 0; } /* -** Create a new mask for cursor iCursor. +** Code an OP_Affinity opcode to apply the column affinity string zAff +** to the n registers starting at base. ** -** There is one cursor per table in the FROM clause. The number of -** tables in the FROM clause is limited by a test early in the -** sqlite3WhereBegin() routine. So we know that the pMaskSet->ix[] -** array will never overflow. -*/ -static void createMask(WhereMaskSet *pMaskSet, int iCursor){ - assert( pMaskSet->n < ArraySize(pMaskSet->ix) ); - pMaskSet->ix[pMaskSet->n++] = iCursor; -} - -/* -** These routines walk (recursively) an expression tree and generate -** a bitmask indicating which tables are used in that expression -** tree. +** As an optimization, SQLITE_AFF_BLOB entries (which are no-ops) at the +** beginning and end of zAff are ignored. If all entries in zAff are +** SQLITE_AFF_BLOB, then no code gets generated. +** +** This routine makes its own copy of zAff so that the caller is free +** to modify zAff after this routine returns. */ -static Bitmask exprListTableUsage(WhereMaskSet*, ExprList*); -static Bitmask exprSelectTableUsage(WhereMaskSet*, Select*); -static Bitmask exprTableUsage(WhereMaskSet *pMaskSet, Expr *p){ - Bitmask mask = 0; - if( p==0 ) return 0; - if( p->op==TK_COLUMN ){ - mask = getMask(pMaskSet, p->iTable); - return mask; +static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){ + Vdbe *v = pParse->pVdbe; + if( zAff==0 ){ + assert( pParse->db->mallocFailed ); + return; } - mask = exprTableUsage(pMaskSet, p->pRight); - mask |= exprTableUsage(pMaskSet, p->pLeft); - if( ExprHasProperty(p, EP_xIsSelect) ){ - mask |= exprSelectTableUsage(pMaskSet, p->x.pSelect); - }else{ - mask |= exprListTableUsage(pMaskSet, p->x.pList); + assert( v!=0 ); + + /* Adjust base and n to skip over SQLITE_AFF_BLOB entries at the beginning + ** and end of the affinity string. + */ + while( n>0 && zAff[0]==SQLITE_AFF_BLOB ){ + n--; + base++; + zAff++; } - return mask; -} -static Bitmask exprListTableUsage(WhereMaskSet *pMaskSet, ExprList *pList){ - int i; - Bitmask mask = 0; - if( pList ){ - for(i=0; i<pList->nExpr; i++){ - mask |= exprTableUsage(pMaskSet, pList->a[i].pExpr); - } + while( n>1 && zAff[n-1]==SQLITE_AFF_BLOB ){ + n--; } - return mask; -} -static Bitmask exprSelectTableUsage(WhereMaskSet *pMaskSet, Select *pS){ - Bitmask mask = 0; - while( pS ){ - SrcList *pSrc = pS->pSrc; - mask |= exprListTableUsage(pMaskSet, pS->pEList); - mask |= exprListTableUsage(pMaskSet, pS->pGroupBy); - mask |= exprListTableUsage(pMaskSet, pS->pOrderBy); - mask |= exprTableUsage(pMaskSet, pS->pWhere); - mask |= exprTableUsage(pMaskSet, pS->pHaving); - if( ALWAYS(pSrc!=0) ){ - int i; - for(i=0; i<pSrc->nSrc; i++){ - mask |= exprSelectTableUsage(pMaskSet, pSrc->a[i].pSelect); - mask |= exprTableUsage(pMaskSet, pSrc->a[i].pOn); - } - } - pS = pS->pPrior; + + /* Code the OP_Affinity opcode if there is anything left to do. */ + if( n>0 ){ + sqlite3VdbeAddOp2(v, OP_Affinity, base, n); + sqlite3VdbeChangeP4(v, -1, zAff, n); + sqlite3ExprCacheAffinityChange(pParse, base, n); } - return mask; } -/* -** Return TRUE if the given operator is one of the operators that is -** allowed for an indexable WHERE clause term. The allowed operators are -** "=", "<", ">", "<=", ">=", "IN", and "IS NULL" -*/ -static int allowedOp(int op){ - assert( TK_GT>TK_EQ && TK_GT<TK_GE ); - assert( TK_LT>TK_EQ && TK_LT<TK_GE ); - assert( TK_LE>TK_EQ && TK_LE<TK_GE ); - assert( TK_GE==TK_EQ+4 ); - return op==TK_IN || (op>=TK_EQ && op<=TK_GE) || op==TK_ISNULL; -} /* -** Commute a comparison operator. Expressions of the form "X op Y" -** are converted into "Y op X". +** Generate code for a single equality term of the WHERE clause. An equality +** term can be either X=expr or X IN (...). pTerm is the term to be +** coded. ** -** If left/right precedence rules come into play when determining the -** collating sequence, then COLLATE operators are adjusted to ensure -** that the collating sequence does not change. For example: -** "Y collate NOCASE op X" becomes "X op Y" because any collation sequence on -** the left hand side of a comparison overrides any collation sequence -** attached to the right. For the same reason the EP_Collate flag -** is not commuted. +** The current value for the constraint is left in register iReg. +** +** For a constraint of the form X=expr, the expression is evaluated and its +** result is left on the stack. For constraints of the form X IN (...) +** this routine sets up a loop that will iterate over all values of X. */ -static void exprCommute(Parse *pParse, Expr *pExpr){ - u16 expRight = (pExpr->pRight->flags & EP_Collate); - u16 expLeft = (pExpr->pLeft->flags & EP_Collate); - assert( allowedOp(pExpr->op) && pExpr->op!=TK_IN ); - if( expRight==expLeft ){ - /* Either X and Y both have COLLATE operator or neither do */ - if( expRight ){ - /* Both X and Y have COLLATE operators. Make sure X is always - ** used by clearing the EP_Collate flag from Y. */ - pExpr->pRight->flags &= ~EP_Collate; - }else if( sqlite3ExprCollSeq(pParse, pExpr->pLeft)!=0 ){ - /* Neither X nor Y have COLLATE operators, but X has a non-default - ** collating sequence. So add the EP_Collate marker on X to cause - ** it to be searched first. */ - pExpr->pLeft->flags |= EP_Collate; - } - } - SWAP(Expr*,pExpr->pRight,pExpr->pLeft); - if( pExpr->op>=TK_GT ){ - assert( TK_LT==TK_GT+2 ); - assert( TK_GE==TK_LE+2 ); - assert( TK_GT>TK_EQ ); - assert( TK_GT<TK_LE ); - assert( pExpr->op>=TK_GT && pExpr->op<=TK_GE ); - pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT; - } -} +static int codeEqualityTerm( + Parse *pParse, /* The parsing context */ + WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ + WhereLevel *pLevel, /* The level of the FROM clause we are working on */ + int iEq, /* Index of the equality term within this level */ + int bRev, /* True for reverse-order IN operations */ + int iTarget /* Attempt to leave results in this register */ +){ + Expr *pX = pTerm->pExpr; + Vdbe *v = pParse->pVdbe; + int iReg; /* Register holding results */ -/* -** Translate from TK_xx operator to WO_xx bitmask. -*/ -static u16 operatorMask(int op){ - u16 c; - assert( allowedOp(op) ); - if( op==TK_IN ){ - c = WO_IN; - }else if( op==TK_ISNULL ){ - c = WO_ISNULL; + assert( iTarget>0 ); + if( pX->op==TK_EQ || pX->op==TK_IS ){ + iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); + }else if( pX->op==TK_ISNULL ){ + iReg = iTarget; + sqlite3VdbeAddOp2(v, OP_Null, 0, iReg); +#ifndef SQLITE_OMIT_SUBQUERY }else{ - assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff ); - c = (u16)(WO_EQ<<(op-TK_EQ)); - } - assert( op!=TK_ISNULL || c==WO_ISNULL ); - assert( op!=TK_IN || c==WO_IN ); - assert( op!=TK_EQ || c==WO_EQ ); - assert( op!=TK_LT || c==WO_LT ); - assert( op!=TK_LE || c==WO_LE ); - assert( op!=TK_GT || c==WO_GT ); - assert( op!=TK_GE || c==WO_GE ); - return c; -} - -/* -** Advance to the next WhereTerm that matches according to the criteria -** established when the pScan object was initialized by whereScanInit(). -** Return NULL if there are no more matching WhereTerms. -*/ -static WhereTerm *whereScanNext(WhereScan *pScan){ - int iCur; /* The cursor on the LHS of the term */ - int iColumn; /* The column on the LHS of the term. -1 for IPK */ - Expr *pX; /* An expression being tested */ - WhereClause *pWC; /* Shorthand for pScan->pWC */ - WhereTerm *pTerm; /* The term being tested */ - int k = pScan->k; /* Where to start scanning */ + int eType; + int iTab; + struct InLoop *pIn; + WhereLoop *pLoop = pLevel->pWLoop; - while( pScan->iEquiv<=pScan->nEquiv ){ - iCur = pScan->aEquiv[pScan->iEquiv-2]; - iColumn = pScan->aEquiv[pScan->iEquiv-1]; - while( (pWC = pScan->pWC)!=0 ){ - for(pTerm=pWC->a+k; k<pWC->nTerm; k++, pTerm++){ - if( pTerm->leftCursor==iCur - && pTerm->u.leftColumn==iColumn - && (pScan->iEquiv<=2 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) - ){ - if( (pTerm->eOperator & WO_EQUIV)!=0 - && pScan->nEquiv<ArraySize(pScan->aEquiv) - ){ - int j; - pX = sqlite3ExprSkipCollate(pTerm->pExpr->pRight); - assert( pX->op==TK_COLUMN ); - for(j=0; j<pScan->nEquiv; j+=2){ - if( pScan->aEquiv[j]==pX->iTable - && pScan->aEquiv[j+1]==pX->iColumn ){ - break; - } - } - if( j==pScan->nEquiv ){ - pScan->aEquiv[j] = pX->iTable; - pScan->aEquiv[j+1] = pX->iColumn; - pScan->nEquiv += 2; - } - } - if( (pTerm->eOperator & pScan->opMask)!=0 ){ - /* Verify the affinity and collating sequence match */ - if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){ - CollSeq *pColl; - Parse *pParse = pWC->pWInfo->pParse; - pX = pTerm->pExpr; - if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){ - continue; - } - assert(pX->pLeft); - pColl = sqlite3BinaryCompareCollSeq(pParse, - pX->pLeft, pX->pRight); - if( pColl==0 ) pColl = pParse->db->pDfltColl; - if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){ - continue; - } - } - if( (pTerm->eOperator & WO_EQ)!=0 - && (pX = pTerm->pExpr->pRight)->op==TK_COLUMN - && pX->iTable==pScan->aEquiv[0] - && pX->iColumn==pScan->aEquiv[1] - ){ - continue; - } - pScan->k = k+1; - return pTerm; - } - } + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 + && pLoop->u.btree.pIndex!=0 + && pLoop->u.btree.pIndex->aSortOrder[iEq] + ){ + testcase( iEq==0 ); + testcase( bRev ); + bRev = !bRev; + } + assert( pX->op==TK_IN ); + iReg = iTarget; + eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0); + if( eType==IN_INDEX_INDEX_DESC ){ + testcase( bRev ); + bRev = !bRev; + } + iTab = pX->iTable; + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0); + VdbeCoverageIf(v, bRev); + VdbeCoverageIf(v, !bRev); + assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); + pLoop->wsFlags |= WHERE_IN_ABLE; + if( pLevel->u.in.nIn==0 ){ + pLevel->addrNxt = sqlite3VdbeMakeLabel(v); + } + pLevel->u.in.nIn++; + pLevel->u.in.aInLoop = + sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop, + sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn); + pIn = pLevel->u.in.aInLoop; + if( pIn ){ + pIn += pLevel->u.in.nIn - 1; + pIn->iCur = iTab; + if( eType==IN_INDEX_ROWID ){ + pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg); + }else{ + pIn->addrInTop = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg); } - pScan->pWC = pScan->pWC->pOuter; - k = 0; + pIn->eEndLoopOp = bRev ? OP_PrevIfOpen : OP_NextIfOpen; + sqlite3VdbeAddOp1(v, OP_IsNull, iReg); VdbeCoverage(v); + }else{ + pLevel->u.in.nIn = 0; } - pScan->pWC = pScan->pOrigWC; - k = 0; - pScan->iEquiv += 2; +#endif } - return 0; + disableTerm(pLevel, pTerm); + return iReg; } /* -** Initialize a WHERE clause scanner object. Return a pointer to the -** first match. Return NULL if there are no matches. +** Generate code that will evaluate all == and IN constraints for an +** index scan. ** -** The scanner will be searching the WHERE clause pWC. It will look -** for terms of the form "X <op> <expr>" where X is column iColumn of table -** iCur. The <op> must be one of the operators described by opMask. +** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c). +** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10 +** The index has as many as three equality constraints, but in this +** example, the third "c" value is an inequality. So only two +** constraints are coded. This routine will generate code to evaluate +** a==5 and b IN (1,2,3). The current values for a and b will be stored +** in consecutive registers and the index of the first register is returned. ** -** If the search is for X and the WHERE clause contains terms of the -** form X=Y then this routine might also return terms of the form -** "Y <op> <expr>". The number of levels of transitivity is limited, -** but is enough to handle most commonly occurring SQL statements. +** In the example above nEq==2. But this subroutine works for any value +** of nEq including 0. If nEq==0, this routine is nearly a no-op. +** The only thing it does is allocate the pLevel->iMem memory cell and +** compute the affinity string. ** -** If X is not the INTEGER PRIMARY KEY then X must be compatible with -** index pIdx. +** The nExtraReg parameter is 0 or 1. It is 0 if all WHERE clause constraints +** are == or IN and are covered by the nEq. nExtraReg is 1 if there is +** an inequality constraint (such as the "c>=5 AND c<10" in the example) that +** occurs after the nEq quality constraints. +** +** This routine allocates a range of nEq+nExtraReg memory cells and returns +** the index of the first memory cell in that range. The code that +** calls this routine will use that memory range to store keys for +** start and termination conditions of the loop. +** key value of the loop. If one or more IN operators appear, then +** this routine allocates an additional nEq memory cells for internal +** use. +** +** Before returning, *pzAff is set to point to a buffer containing a +** copy of the column affinity string of the index allocated using +** sqlite3DbMalloc(). Except, entries in the copy of the string associated +** with equality constraints that use BLOB or NONE affinity are set to +** SQLITE_AFF_BLOB. This is to deal with SQL such as the following: +** +** CREATE TABLE t1(a TEXT PRIMARY KEY, b); +** SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b; +** +** In the example above, the index on t1(a) has TEXT affinity. But since +** the right hand side of the equality constraint (t2.b) has BLOB/NONE affinity, +** no conversion should be attempted before using a t2.b value as part of +** a key to search the index. Hence the first byte in the returned affinity +** string in this example would be set to SQLITE_AFF_BLOB. */ -static WhereTerm *whereScanInit( - WhereScan *pScan, /* The WhereScan object being initialized */ - WhereClause *pWC, /* The WHERE clause to be scanned */ - int iCur, /* Cursor to scan for */ - int iColumn, /* Column to scan for */ - u32 opMask, /* Operator(s) to scan for */ - Index *pIdx /* Must be compatible with this index */ +static int codeAllEqualityTerms( + Parse *pParse, /* Parsing context */ + WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ + int bRev, /* Reverse the order of IN operators */ + int nExtraReg, /* Number of extra registers to allocate */ + char **pzAff /* OUT: Set to point to affinity string */ ){ - int j; + u16 nEq; /* The number of == or IN constraints to code */ + u16 nSkip; /* Number of left-most columns to skip */ + Vdbe *v = pParse->pVdbe; /* The vm under construction */ + Index *pIdx; /* The index being used for this loop */ + WhereTerm *pTerm; /* A single constraint term */ + WhereLoop *pLoop; /* The WhereLoop object */ + int j; /* Loop counter */ + int regBase; /* Base register */ + int nReg; /* Number of registers to allocate */ + char *zAff; /* Affinity string to return */ - /* memset(pScan, 0, sizeof(*pScan)); */ - pScan->pOrigWC = pWC; - pScan->pWC = pWC; - if( pIdx && iColumn>=0 ){ - pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; - for(j=0; pIdx->aiColumn[j]!=iColumn; j++){ - if( NEVER(j>pIdx->nColumn) ) return 0; - } - pScan->zCollName = pIdx->azColl[j]; - }else{ - pScan->idxaff = 0; - pScan->zCollName = 0; + /* This module is only called on query plans that use an index. */ + pLoop = pLevel->pWLoop; + assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); + nEq = pLoop->u.btree.nEq; + nSkip = pLoop->nSkip; + pIdx = pLoop->u.btree.pIndex; + assert( pIdx!=0 ); + + /* Figure out how many memory cells we will need then allocate them. + */ + regBase = pParse->nMem + 1; + nReg = pLoop->u.btree.nEq + nExtraReg; + pParse->nMem += nReg; + + zAff = sqlite3DbStrDup(pParse->db, sqlite3IndexAffinityStr(v, pIdx)); + if( !zAff ){ + pParse->db->mallocFailed = 1; } - pScan->opMask = opMask; - pScan->k = 0; - pScan->aEquiv[0] = iCur; - pScan->aEquiv[1] = iColumn; - pScan->nEquiv = 2; - pScan->iEquiv = 2; - return whereScanNext(pScan); -} -/* -** Search for a term in the WHERE clause that is of the form "X <op> <expr>" -** where X is a reference to the iColumn of table iCur and <op> is one of -** the WO_xx operator codes specified by the op parameter. -** Return a pointer to the term. Return 0 if not found. -** -** The term returned might by Y=<expr> if there is another constraint in -** the WHERE clause that specifies that X=Y. Any such constraints will be -** identified by the WO_EQUIV bit in the pTerm->eOperator field. The -** aEquiv[] array holds X and all its equivalents, with each SQL variable -** taking up two slots in aEquiv[]. The first slot is for the cursor number -** and the second is for the column number. There are 22 slots in aEquiv[] -** so that means we can look for X plus up to 10 other equivalent values. -** Hence a search for X will return <expr> if X=A1 and A1=A2 and A2=A3 -** and ... and A9=A10 and A10=<expr>. -** -** If there are multiple terms in the WHERE clause of the form "X <op> <expr>" -** then try for the one with no dependencies on <expr> - in other words where -** <expr> is a constant expression of some kind. Only return entries of -** the form "X <op> Y" where Y is a column in another table if no terms of -** the form "X <op> <const-expr>" exist. If no terms with a constant RHS -** exist, try to return a term that does not use WO_EQUIV. -*/ -static WhereTerm *findTerm( - WhereClause *pWC, /* The WHERE clause to be searched */ - int iCur, /* Cursor number of LHS */ - int iColumn, /* Column number of LHS */ - Bitmask notReady, /* RHS must not overlap with this mask */ - u32 op, /* Mask of WO_xx values describing operator */ - Index *pIdx /* Must be compatible with this index, if not NULL */ -){ - WhereTerm *pResult = 0; - WhereTerm *p; - WhereScan scan; + if( nSkip ){ + int iIdxCur = pLevel->iIdxCur; + sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + VdbeComment((v, "begin skip-scan on %s", pIdx->zName)); + j = sqlite3VdbeAddOp0(v, OP_Goto); + pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT), + iIdxCur, 0, regBase, nSkip); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + sqlite3VdbeJumpHere(v, j); + for(j=0; j<nSkip; j++){ + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, j, regBase+j); + assert( pIdx->aiColumn[j]>=0 ); + VdbeComment((v, "%s", pIdx->pTable->aCol[pIdx->aiColumn[j]].zName)); + } + } - p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx); - while( p ){ - if( (p->prereqRight & notReady)==0 ){ - if( p->prereqRight==0 && (p->eOperator&WO_EQ)!=0 ){ - return p; + /* Evaluate the equality constraints + */ + assert( zAff==0 || (int)strlen(zAff)>=nEq ); + for(j=nSkip; j<nEq; j++){ + int r1; + pTerm = pLoop->aLTerm[j]; + assert( pTerm!=0 ); + /* The following testcase is true for indices with redundant columns. + ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */ + testcase( (pTerm->wtFlags & TERM_CODED)!=0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j); + if( r1!=regBase+j ){ + if( nReg==1 ){ + sqlite3ReleaseTempReg(pParse, regBase); + regBase = r1; + }else{ + sqlite3VdbeAddOp2(v, OP_SCopy, r1, regBase+j); + } + } + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_IN ); + if( (pTerm->eOperator & (WO_ISNULL|WO_IN))==0 ){ + Expr *pRight = pTerm->pExpr->pRight; + if( (pTerm->wtFlags & TERM_IS)==0 && sqlite3ExprCanBeNull(pRight) ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk); + VdbeCoverage(v); + } + if( zAff ){ + if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_BLOB ){ + zAff[j] = SQLITE_AFF_BLOB; + } + if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){ + zAff[j] = SQLITE_AFF_BLOB; + } } - if( pResult==0 ) pResult = p; } - p = whereScanNext(&scan); } - return pResult; + *pzAff = zAff; + return regBase; } -/* Forward reference */ -static void exprAnalyze(SrcList*, WhereClause*, int); - /* -** Call exprAnalyze on all terms in a WHERE clause. +** If the most recently coded instruction is a constant range contraint +** that originated from the LIKE optimization, then change the P3 to be +** pLoop->iLikeRepCntr and set P5. +** +** The LIKE optimization trys to evaluate "x LIKE 'abc%'" as a range +** expression: "x>='ABC' AND x<'abd'". But this requires that the range +** scan loop run twice, once for strings and a second time for BLOBs. +** The OP_String opcodes on the second pass convert the upper and lower +** bound string contants to blobs. This routine makes the necessary changes +** to the OP_String opcodes for that to happen. */ -static void exprAnalyzeAll( - SrcList *pTabList, /* the FROM clause */ - WhereClause *pWC /* the WHERE clause to be analyzed */ +static void whereLikeOptimizationStringFixup( + Vdbe *v, /* prepared statement under construction */ + WhereLevel *pLevel, /* The loop that contains the LIKE operator */ + WhereTerm *pTerm /* The upper or lower bound just coded */ ){ - int i; - for(i=pWC->nTerm-1; i>=0; i--){ - exprAnalyze(pTabList, pWC, i); + if( pTerm->wtFlags & TERM_LIKEOPT ){ + VdbeOp *pOp; + assert( pLevel->iLikeRepCntr>0 ); + pOp = sqlite3VdbeGetOp(v, -1); + assert( pOp!=0 ); + assert( pOp->opcode==OP_String8 + || pTerm->pWC->pWInfo->pParse->db->mallocFailed ); + pOp->p3 = pLevel->iLikeRepCntr; + pOp->p5 = 1; } } -#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION + /* -** Check to see if the given expression is a LIKE or GLOB operator that -** can be optimized using inequality constraints. Return TRUE if it is -** so and false if not. -** -** In order for the operator to be optimizible, the RHS must be a string -** literal that does not begin with a wildcard. The LHS must be a column -** that may only be NULL, a string, or a BLOB, never a number. (This means -** that virtual tables cannot participate in the LIKE optimization.) If the -** collating sequence for the column on the LHS must be appropriate for -** the operator. +** Generate code for the start of the iLevel-th loop in the WHERE clause +** implementation described by pWInfo. */ -static int isLikeOrGlob( +SQLITE_PRIVATE Bitmask sqlite3WhereCodeOneLoopStart( + WhereInfo *pWInfo, /* Complete information about the WHERE clause */ + int iLevel, /* Which level of pWInfo->a[] should be coded */ + Bitmask notReady /* Which tables are currently available */ +){ + int j, k; /* Loop counters */ + int iCur; /* The VDBE cursor for the table */ + int addrNxt; /* Where to jump to continue with the next IN case */ + int omitTable; /* True if we use the index only */ + int bRev; /* True if we need to scan in reverse order */ + WhereLevel *pLevel; /* The where level to be coded */ + WhereLoop *pLoop; /* The WhereLoop object being coded */ + WhereClause *pWC; /* Decomposition of the entire WHERE clause */ + WhereTerm *pTerm; /* A WHERE clause term */ + Parse *pParse; /* Parsing context */ + sqlite3 *db; /* Database connection */ + Vdbe *v; /* The prepared stmt under constructions */ + struct SrcList_item *pTabItem; /* FROM clause term being coded */ + int addrBrk; /* Jump here to break out of the loop */ + int addrCont; /* Jump here to continue with next cycle */ + int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ + int iReleaseReg = 0; /* Temp register to free before returning */ + + pParse = pWInfo->pParse; + v = pParse->pVdbe; + pWC = &pWInfo->sWC; + db = pParse->db; + pLevel = &pWInfo->a[iLevel]; + pLoop = pLevel->pWLoop; + pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; + iCur = pTabItem->iCursor; + pLevel->notReady = notReady & ~sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); + bRev = (pWInfo->revMask>>iLevel)&1; + omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 + && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0; + VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName)); + + /* Create labels for the "break" and "continue" instructions + ** for the current loop. Jump to addrBrk to break out of a loop. + ** Jump to cont to go immediately to the next iteration of the + ** loop. + ** + ** When there is an IN operator, we also have a "addrNxt" label that + ** means to continue with the next IN value combination. When + ** there are no IN operators in the constraints, the "addrNxt" label + ** is the same as "addrBrk". + */ + addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(v); + addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(v); + + /* If this is the right table of a LEFT OUTER JOIN, allocate and + ** initialize a memory cell that records if this table matches any + ** row of the left table of the join. + */ + if( pLevel->iFrom>0 && (pTabItem[0].jointype & JT_LEFT)!=0 ){ + pLevel->iLeftJoin = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin); + VdbeComment((v, "init LEFT JOIN no-match flag")); + } + + /* Special case of a FROM clause subquery implemented as a co-routine */ + if( pTabItem->viaCoroutine ){ + int regYield = pTabItem->regReturn; + sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); + pLevel->p2 = sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk); + VdbeCoverage(v); + VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName)); + pLevel->op = OP_Goto; + }else + +#ifndef SQLITE_OMIT_VIRTUALTABLE + if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ + /* Case 1: The table is a virtual-table. Use the VFilter and VNext + ** to access the data. + */ + int iReg; /* P3 Value for OP_VFilter */ + int addrNotFound; + int nConstraint = pLoop->nLTerm; + + sqlite3ExprCachePush(pParse); + iReg = sqlite3GetTempRange(pParse, nConstraint+2); + addrNotFound = pLevel->addrBrk; + for(j=0; j<nConstraint; j++){ + int iTarget = iReg+j+2; + pTerm = pLoop->aLTerm[j]; + if( pTerm==0 ) continue; + if( pTerm->eOperator & WO_IN ){ + codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget); + addrNotFound = pLevel->addrNxt; + }else{ + sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); + } + } + sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg); + sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1); + sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, + pLoop->u.vtab.idxStr, + pLoop->u.vtab.needFree ? P4_MPRINTF : P4_STATIC); + VdbeCoverage(v); + pLoop->u.vtab.needFree = 0; + for(j=0; j<nConstraint && j<16; j++){ + if( (pLoop->u.vtab.omitMask>>j)&1 ){ + disableTerm(pLevel, pLoop->aLTerm[j]); + } + } + pLevel->op = OP_VNext; + pLevel->p1 = iCur; + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2); + sqlite3ExprCachePop(pParse); + }else +#endif /* SQLITE_OMIT_VIRTUALTABLE */ + + if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0 + ){ + /* Case 2: We can directly reference a single row using an + ** equality comparison against the ROWID field. Or + ** we reference multiple rows using a "rowid IN (...)" + ** construct. + */ + assert( pLoop->u.btree.nEq==1 ); + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->pExpr!=0 ); + assert( omitTable==0 ); + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + iReleaseReg = ++pParse->nMem; + iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); + if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); + addrNxt = pLevel->addrNxt; + sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt); VdbeCoverage(v); + sqlite3VdbeAddOp3(v, OP_NotExists, iCur, addrNxt, iRowidReg); + VdbeCoverage(v); + sqlite3ExprCacheAffinityChange(pParse, iRowidReg, 1); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + VdbeComment((v, "pk")); + pLevel->op = OP_Noop; + }else if( (pLoop->wsFlags & WHERE_IPK)!=0 + && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0 + ){ + /* Case 3: We have an inequality comparison against the ROWID field. + */ + int testOp = OP_Noop; + int start; + int memEndValue = 0; + WhereTerm *pStart, *pEnd; + + assert( omitTable==0 ); + j = 0; + pStart = pEnd = 0; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++]; + if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++]; + assert( pStart!=0 || pEnd!=0 ); + if( bRev ){ + pTerm = pStart; + pStart = pEnd; + pEnd = pTerm; + } + if( pStart ){ + Expr *pX; /* The expression that defines the start bound */ + int r1, rTemp; /* Registers for holding the start boundary */ + + /* The following constant maps TK_xx codes into corresponding + ** seek opcodes. It depends on a particular ordering of TK_xx + */ + const u8 aMoveOp[] = { + /* TK_GT */ OP_SeekGT, + /* TK_LE */ OP_SeekLE, + /* TK_LT */ OP_SeekLT, + /* TK_GE */ OP_SeekGE + }; + assert( TK_LE==TK_GT+1 ); /* Make sure the ordering.. */ + assert( TK_LT==TK_GT+2 ); /* ... of the TK_xx values... */ + assert( TK_GE==TK_GT+3 ); /* ... is correcct. */ + + assert( (pStart->wtFlags & TERM_VNULL)==0 ); + testcase( pStart->wtFlags & TERM_VIRTUAL ); + pX = pStart->pExpr; + assert( pX!=0 ); + testcase( pStart->leftCursor!=iCur ); /* transitive constraints */ + r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp); + sqlite3VdbeAddOp3(v, aMoveOp[pX->op-TK_GT], iCur, addrBrk, r1); + VdbeComment((v, "pk")); + VdbeCoverageIf(v, pX->op==TK_GT); + VdbeCoverageIf(v, pX->op==TK_LE); + VdbeCoverageIf(v, pX->op==TK_LT); + VdbeCoverageIf(v, pX->op==TK_GE); + sqlite3ExprCacheAffinityChange(pParse, r1, 1); + sqlite3ReleaseTempReg(pParse, rTemp); + disableTerm(pLevel, pStart); + }else{ + sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrBrk); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + } + if( pEnd ){ + Expr *pX; + pX = pEnd->pExpr; + assert( pX!=0 ); + assert( (pEnd->wtFlags & TERM_VNULL)==0 ); + testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */ + testcase( pEnd->wtFlags & TERM_VIRTUAL ); + memEndValue = ++pParse->nMem; + sqlite3ExprCode(pParse, pX->pRight, memEndValue); + if( pX->op==TK_LT || pX->op==TK_GT ){ + testOp = bRev ? OP_Le : OP_Ge; + }else{ + testOp = bRev ? OP_Lt : OP_Gt; + } + disableTerm(pLevel, pEnd); + } + start = sqlite3VdbeCurrentAddr(v); + pLevel->op = bRev ? OP_Prev : OP_Next; + pLevel->p1 = iCur; + pLevel->p2 = start; + assert( pLevel->p5==0 ); + if( testOp!=OP_Noop ){ + iRowidReg = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg); + VdbeCoverageIf(v, testOp==OP_Le); + VdbeCoverageIf(v, testOp==OP_Lt); + VdbeCoverageIf(v, testOp==OP_Ge); + VdbeCoverageIf(v, testOp==OP_Gt); + sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL); + } + }else if( pLoop->wsFlags & WHERE_INDEXED ){ + /* Case 4: A scan using an index. + ** + ** The WHERE clause may contain zero or more equality + ** terms ("==" or "IN" operators) that refer to the N + ** left-most columns of the index. It may also contain + ** inequality constraints (>, <, >= or <=) on the indexed + ** column that immediately follows the N equalities. Only + ** the right-most column can be an inequality - the rest must + ** use the "==" and "IN" operators. For example, if the + ** index is on (x,y,z), then the following clauses are all + ** optimized: + ** + ** x=5 + ** x=5 AND y=10 + ** x=5 AND y<10 + ** x=5 AND y>5 AND y<10 + ** x=5 AND y=5 AND z<=10 + ** + ** The z<10 term of the following cannot be used, only + ** the x=5 term: + ** + ** x=5 AND z<10 + ** + ** N may be zero if there are inequality constraints. + ** If there are no inequality constraints, then N is at + ** least one. + ** + ** This case is also used when there are no WHERE clause + ** constraints but an index is selected anyway, in order + ** to force the output order to conform to an ORDER BY. + */ + static const u8 aStartOp[] = { + 0, + 0, + OP_Rewind, /* 2: (!start_constraints && startEq && !bRev) */ + OP_Last, /* 3: (!start_constraints && startEq && bRev) */ + OP_SeekGT, /* 4: (start_constraints && !startEq && !bRev) */ + OP_SeekLT, /* 5: (start_constraints && !startEq && bRev) */ + OP_SeekGE, /* 6: (start_constraints && startEq && !bRev) */ + OP_SeekLE /* 7: (start_constraints && startEq && bRev) */ + }; + static const u8 aEndOp[] = { + OP_IdxGE, /* 0: (end_constraints && !bRev && !endEq) */ + OP_IdxGT, /* 1: (end_constraints && !bRev && endEq) */ + OP_IdxLE, /* 2: (end_constraints && bRev && !endEq) */ + OP_IdxLT, /* 3: (end_constraints && bRev && endEq) */ + }; + u16 nEq = pLoop->u.btree.nEq; /* Number of == or IN terms */ + int regBase; /* Base register holding constraint values */ + WhereTerm *pRangeStart = 0; /* Inequality constraint at range start */ + WhereTerm *pRangeEnd = 0; /* Inequality constraint at range end */ + int startEq; /* True if range start uses ==, >= or <= */ + int endEq; /* True if range end uses ==, >= or <= */ + int start_constraints; /* Start of range is constrained */ + int nConstraint; /* Number of constraint terms */ + Index *pIdx; /* The index we will be using */ + int iIdxCur; /* The VDBE cursor for the index */ + int nExtraReg = 0; /* Number of extra registers needed */ + int op; /* Instruction opcode */ + char *zStartAff; /* Affinity for start of range constraint */ + char cEndAff = 0; /* Affinity for end of range constraint */ + u8 bSeekPastNull = 0; /* True to seek past initial nulls */ + u8 bStopAtNull = 0; /* Add condition to terminate at NULLs */ + + pIdx = pLoop->u.btree.pIndex; + iIdxCur = pLevel->iIdxCur; + assert( nEq>=pLoop->nSkip ); + + /* If this loop satisfies a sort order (pOrderBy) request that + ** was passed to this function to implement a "SELECT min(x) ..." + ** query, then the caller will only allow the loop to run for + ** a single iteration. This means that the first row returned + ** should not have a NULL value stored in 'x'. If column 'x' is + ** the first one after the nEq equality constraints in the index, + ** this requires some special handling. + */ + assert( pWInfo->pOrderBy==0 + || pWInfo->pOrderBy->nExpr==1 + || (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 ); + if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0 + && pWInfo->nOBSat>0 + && (pIdx->nKeyCol>nEq) + ){ + assert( pLoop->nSkip==0 ); + bSeekPastNull = 1; + nExtraReg = 1; + } + + /* Find any inequality constraint terms for the start and end + ** of the range. + */ + j = nEq; + if( pLoop->wsFlags & WHERE_BTM_LIMIT ){ + pRangeStart = pLoop->aLTerm[j++]; + nExtraReg = 1; + /* Like optimization range constraints always occur in pairs */ + assert( (pRangeStart->wtFlags & TERM_LIKEOPT)==0 || + (pLoop->wsFlags & WHERE_TOP_LIMIT)!=0 ); + } + if( pLoop->wsFlags & WHERE_TOP_LIMIT ){ + pRangeEnd = pLoop->aLTerm[j++]; + nExtraReg = 1; + if( (pRangeEnd->wtFlags & TERM_LIKEOPT)!=0 ){ + assert( pRangeStart!=0 ); /* LIKE opt constraints */ + assert( pRangeStart->wtFlags & TERM_LIKEOPT ); /* occur in pairs */ + pLevel->iLikeRepCntr = ++pParse->nMem; + testcase( bRev ); + testcase( pIdx->aSortOrder[nEq]==SQLITE_SO_DESC ); + sqlite3VdbeAddOp2(v, OP_Integer, + bRev ^ (pIdx->aSortOrder[nEq]==SQLITE_SO_DESC), + pLevel->iLikeRepCntr); + VdbeComment((v, "LIKE loop counter")); + pLevel->addrLikeRep = sqlite3VdbeCurrentAddr(v); + } + if( pRangeStart==0 + && (j = pIdx->aiColumn[nEq])>=0 + && pIdx->pTable->aCol[j].notNull==0 + ){ + bSeekPastNull = 1; + } + } + assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 ); + + /* Generate code to evaluate all constraint terms using == or IN + ** and store the values of those terms in an array of registers + ** starting at regBase. + */ + regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff); + assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq ); + if( zStartAff ) cEndAff = zStartAff[nEq]; + addrNxt = pLevel->addrNxt; + + /* If we are doing a reverse order scan on an ascending index, or + ** a forward order scan on a descending index, interchange the + ** start and end terms (pRangeStart and pRangeEnd). + */ + if( (nEq<pIdx->nKeyCol && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC)) + || (bRev && pIdx->nKeyCol==nEq) + ){ + SWAP(WhereTerm *, pRangeEnd, pRangeStart); + SWAP(u8, bSeekPastNull, bStopAtNull); + } + + testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 ); + testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 ); + testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 ); + startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE); + endEq = !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE); + start_constraints = pRangeStart || nEq>0; + + /* Seek the index cursor to the start of the range. */ + nConstraint = nEq; + if( pRangeStart ){ + Expr *pRight = pRangeStart->pExpr->pRight; + sqlite3ExprCode(pParse, pRight, regBase+nEq); + whereLikeOptimizationStringFixup(v, pLevel, pRangeStart); + if( (pRangeStart->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); + } + if( zStartAff ){ + if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_BLOB){ + /* Since the comparison is to be performed with no conversions + ** applied to the operands, set the affinity to apply to pRight to + ** SQLITE_AFF_BLOB. */ + zStartAff[nEq] = SQLITE_AFF_BLOB; + } + if( sqlite3ExprNeedsNoAffinityChange(pRight, zStartAff[nEq]) ){ + zStartAff[nEq] = SQLITE_AFF_BLOB; + } + } + nConstraint++; + testcase( pRangeStart->wtFlags & TERM_VIRTUAL ); + }else if( bSeekPastNull ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + nConstraint++; + startEq = 0; + start_constraints = 1; + } + codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff); + op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; + assert( op!=0 ); + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + VdbeCoverage(v); + VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); + VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); + VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); + VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); + VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); + VdbeCoverageIf(v, op==OP_SeekLT); testcase( op==OP_SeekLT ); + + /* Load the value for the inequality constraint at the end of the + ** range (if any). + */ + nConstraint = nEq; + if( pRangeEnd ){ + Expr *pRight = pRangeEnd->pExpr->pRight; + sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); + sqlite3ExprCode(pParse, pRight, regBase+nEq); + whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd); + if( (pRangeEnd->wtFlags & TERM_VNULL)==0 + && sqlite3ExprCanBeNull(pRight) + ){ + sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); + VdbeCoverage(v); + } + if( sqlite3CompareAffinity(pRight, cEndAff)!=SQLITE_AFF_BLOB + && !sqlite3ExprNeedsNoAffinityChange(pRight, cEndAff) + ){ + codeApplyAffinity(pParse, regBase+nEq, 1, &cEndAff); + } + nConstraint++; + testcase( pRangeEnd->wtFlags & TERM_VIRTUAL ); + }else if( bStopAtNull ){ + sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); + endEq = 0; + nConstraint++; + } + sqlite3DbFree(db, zStartAff); + + /* Top of the loop body */ + pLevel->p2 = sqlite3VdbeCurrentAddr(v); + + /* Check if the index cursor is past the end of the range. */ + if( nConstraint ){ + op = aEndOp[bRev*2 + endEq]; + sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); + testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); + testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); + testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); + testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + } + + /* Seek the table cursor, if required */ + disableTerm(pLevel, pRangeStart); + disableTerm(pLevel, pRangeEnd); + if( omitTable ){ + /* pIdx is a covering index. No need to access the main table. */ + }else if( HasRowid(pIdx->pTable) ){ + iRowidReg = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg); + sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); + sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ + }else if( iCur!=iIdxCur ){ + Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); + iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); + for(j=0; j<pPk->nKeyCol; j++){ + k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); + sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j); + } + sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont, + iRowidReg, pPk->nKeyCol); VdbeCoverage(v); + } + + /* Record the instruction used to terminate the loop. Disable + ** WHERE clause terms made redundant by the index range scan. + */ + if( pLoop->wsFlags & WHERE_ONEROW ){ + pLevel->op = OP_Noop; + }else if( bRev ){ + pLevel->op = OP_Prev; + }else{ + pLevel->op = OP_Next; + } + pLevel->p1 = iIdxCur; + pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0; + if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){ + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + }else{ + assert( pLevel->p5==0 ); + } + }else + +#ifndef SQLITE_OMIT_OR_OPTIMIZATION + if( pLoop->wsFlags & WHERE_MULTI_OR ){ + /* Case 5: Two or more separately indexed terms connected by OR + ** + ** Example: + ** + ** CREATE TABLE t1(a,b,c,d); + ** CREATE INDEX i1 ON t1(a); + ** CREATE INDEX i2 ON t1(b); + ** CREATE INDEX i3 ON t1(c); + ** + ** SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13) + ** + ** In the example, there are three indexed terms connected by OR. + ** The top of the loop looks like this: + ** + ** Null 1 # Zero the rowset in reg 1 + ** + ** Then, for each indexed term, the following. The arguments to + ** RowSetTest are such that the rowid of the current row is inserted + ** into the RowSet. If it is already present, control skips the + ** Gosub opcode and jumps straight to the code generated by WhereEnd(). + ** + ** sqlite3WhereBegin(<term>) + ** RowSetTest # Insert rowid into rowset + ** Gosub 2 A + ** sqlite3WhereEnd() + ** + ** Following the above, code to terminate the loop. Label A, the target + ** of the Gosub above, jumps to the instruction right after the Goto. + ** + ** Null 1 # Zero the rowset in reg 1 + ** Goto B # The loop is finished. + ** + ** A: <loop body> # Return data, whatever. + ** + ** Return 2 # Jump back to the Gosub + ** + ** B: <after the loop> + ** + ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then + ** use an ephemeral index instead of a RowSet to record the primary + ** keys of the rows we have already seen. + ** + */ + WhereClause *pOrWc; /* The OR-clause broken out into subterms */ + SrcList *pOrTab; /* Shortened table list or OR-clause generation */ + Index *pCov = 0; /* Potential covering index (or NULL) */ + int iCovCur = pParse->nTab++; /* Cursor used for index scans (if any) */ + + int regReturn = ++pParse->nMem; /* Register used with OP_Gosub */ + int regRowset = 0; /* Register for RowSet object */ + int regRowid = 0; /* Register holding rowid */ + int iLoopBody = sqlite3VdbeMakeLabel(v); /* Start of loop body */ + int iRetInit; /* Address of regReturn init */ + int untestedTerms = 0; /* Some terms not completely tested */ + int ii; /* Loop counter */ + u16 wctrlFlags; /* Flags for sub-WHERE clause */ + Expr *pAndExpr = 0; /* An ".. AND (...)" expression */ + Table *pTab = pTabItem->pTab; + + pTerm = pLoop->aLTerm[0]; + assert( pTerm!=0 ); + assert( pTerm->eOperator & WO_OR ); + assert( (pTerm->wtFlags & TERM_ORINFO)!=0 ); + pOrWc = &pTerm->u.pOrInfo->wc; + pLevel->op = OP_Return; + pLevel->p1 = regReturn; + + /* Set up a new SrcList in pOrTab containing the table being scanned + ** by this loop in the a[0] slot and all notReady tables in a[1..] slots. + ** This becomes the SrcList in the recursive call to sqlite3WhereBegin(). + */ + if( pWInfo->nLevel>1 ){ + int nNotReady; /* The number of notReady tables */ + struct SrcList_item *origSrc; /* Original list of tables */ + nNotReady = pWInfo->nLevel - iLevel - 1; + pOrTab = sqlite3StackAllocRaw(db, + sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0])); + if( pOrTab==0 ) return notReady; + pOrTab->nAlloc = (u8)(nNotReady + 1); + pOrTab->nSrc = pOrTab->nAlloc; + memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem)); + origSrc = pWInfo->pTabList->a; + for(k=1; k<=nNotReady; k++){ + memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k])); + } + }else{ + pOrTab = pWInfo->pTabList; + } + + /* Initialize the rowset register to contain NULL. An SQL NULL is + ** equivalent to an empty rowset. Or, create an ephemeral index + ** capable of holding primary keys in the case of a WITHOUT ROWID. + ** + ** Also initialize regReturn to contain the address of the instruction + ** immediately following the OP_Return at the bottom of the loop. This + ** is required in a few obscure LEFT JOIN cases where control jumps + ** over the top of the loop into the body of it. In this case the + ** correct response for the end-of-loop code (the OP_Return) is to + ** fall through to the next instruction, just as an OP_Next does if + ** called on an uninitialized cursor. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + if( HasRowid(pTab) ){ + regRowset = ++pParse->nMem; + sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + regRowset = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol); + sqlite3VdbeSetP4KeyInfo(pParse, pPk); + } + regRowid = ++pParse->nMem; + } + iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn); + + /* If the original WHERE clause is z of the form: (x1 OR x2 OR ...) AND y + ** Then for every term xN, evaluate as the subexpression: xN AND z + ** That way, terms in y that are factored into the disjunction will + ** be picked up by the recursive calls to sqlite3WhereBegin() below. + ** + ** Actually, each subexpression is converted to "xN AND w" where w is + ** the "interesting" terms of z - terms that did not originate in the + ** ON or USING clause of a LEFT JOIN, and terms that are usable as + ** indices. + ** + ** This optimization also only applies if the (x1 OR x2 OR ...) term + ** is not contained in the ON clause of a LEFT JOIN. + ** See ticket http://www.sqlite.org/src/info/f2369304e4 + */ + if( pWC->nTerm>1 ){ + int iTerm; + for(iTerm=0; iTerm<pWC->nTerm; iTerm++){ + Expr *pExpr = pWC->a[iTerm].pExpr; + if( &pWC->a[iTerm] == pTerm ) continue; + if( ExprHasProperty(pExpr, EP_FromJoin) ) continue; + if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue; + if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; + testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); + pExpr = sqlite3ExprDup(db, pExpr, 0); + pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr); + } + if( pAndExpr ){ + pAndExpr = sqlite3PExpr(pParse, TK_AND, 0, pAndExpr, 0); + } + } + + /* Run a separate WHERE clause for each term of the OR clause. After + ** eliminating duplicates from other WHERE clauses, the action for each + ** sub-WHERE clause is to to invoke the main loop body as a subroutine. + */ + wctrlFlags = WHERE_OMIT_OPEN_CLOSE + | WHERE_FORCE_TABLE + | WHERE_ONETABLE_ONLY + | WHERE_NO_AUTOINDEX; + for(ii=0; ii<pOrWc->nTerm; ii++){ + WhereTerm *pOrTerm = &pOrWc->a[ii]; + if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ + WhereInfo *pSubWInfo; /* Info for single OR-term scan */ + Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */ + int j1 = 0; /* Address of jump operation */ + if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){ + pAndExpr->pLeft = pOrExpr; + pOrExpr = pAndExpr; + } + /* Loop through table entries that match term pOrTerm. */ + WHERETRACE(0xffff, ("Subplan for OR-clause:\n")); + pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, + wctrlFlags, iCovCur); + assert( pSubWInfo || pParse->nErr || db->mallocFailed ); + if( pSubWInfo ){ + WhereLoop *pSubLoop; + int addrExplain = sqlite3WhereExplainOneScan( + pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 + ); + sqlite3WhereAddScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); + + /* This is the sub-WHERE clause body. First skip over + ** duplicate rows from prior sub-WHERE clauses, and record the + ** rowid (or PRIMARY KEY) for the current row so that the same + ** row will be skipped in subsequent sub-WHERE clauses. + */ + if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ + int r; + int iSet = ((ii==pOrWc->nTerm-1)?-1:ii); + if( HasRowid(pTab) ){ + r = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, regRowid, 0); + j1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, r,iSet); + VdbeCoverage(v); + }else{ + Index *pPk = sqlite3PrimaryKeyIndex(pTab); + int nPk = pPk->nKeyCol; + int iPk; + + /* Read the PK into an array of temp registers. */ + r = sqlite3GetTempRange(pParse, nPk); + for(iPk=0; iPk<nPk; iPk++){ + int iCol = pPk->aiColumn[iPk]; + int rx; + rx = sqlite3ExprCodeGetColumn(pParse, pTab, iCol, iCur,r+iPk,0); + if( rx!=r+iPk ){ + sqlite3VdbeAddOp2(v, OP_SCopy, rx, r+iPk); + } + } + + /* Check if the temp table already contains this key. If so, + ** the row has already been included in the result set and + ** can be ignored (by jumping past the Gosub below). Otherwise, + ** insert the key into the temp table and proceed with processing + ** the row. + ** + ** Use some of the same optimizations as OP_RowSetTest: If iSet + ** is zero, assume that the key cannot already be present in + ** the temp table. And if iSet is -1, assume that there is no + ** need to insert the key into the temp table, as it will never + ** be tested for. */ + if( iSet ){ + j1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk); + VdbeCoverage(v); + } + if( iSet>=0 ){ + sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid); + sqlite3VdbeAddOp3(v, OP_IdxInsert, regRowset, regRowid, 0); + if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + } + + /* Release the array of temp registers */ + sqlite3ReleaseTempRange(pParse, r, nPk); + } + } + + /* Invoke the main loop body as a subroutine */ + sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody); + + /* Jump here (skipping the main loop body subroutine) if the + ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */ + if( j1 ) sqlite3VdbeJumpHere(v, j1); + + /* The pSubWInfo->untestedTerms flag means that this OR term + ** contained one or more AND term from a notReady table. The + ** terms from the notReady table could not be tested and will + ** need to be tested later. + */ + if( pSubWInfo->untestedTerms ) untestedTerms = 1; + + /* If all of the OR-connected terms are optimized using the same + ** index, and the index is opened using the same cursor number + ** by each call to sqlite3WhereBegin() made by this loop, it may + ** be possible to use that index as a covering index. + ** + ** If the call to sqlite3WhereBegin() above resulted in a scan that + ** uses an index, and this is either the first OR-connected term + ** processed or the index is the same as that used by all previous + ** terms, set pCov to the candidate covering index. Otherwise, set + ** pCov to NULL to indicate that no candidate covering index will + ** be available. + */ + pSubLoop = pSubWInfo->a[0].pWLoop; + assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); + if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0 + && (ii==0 || pSubLoop->u.btree.pIndex==pCov) + && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex)) + ){ + assert( pSubWInfo->a[0].iIdxCur==iCovCur ); + pCov = pSubLoop->u.btree.pIndex; + wctrlFlags |= WHERE_REOPEN_IDX; + }else{ + pCov = 0; + } + + /* Finish the loop through table entries that match term pOrTerm. */ + sqlite3WhereEnd(pSubWInfo); + } + } + } + pLevel->u.pCovidx = pCov; + if( pCov ) pLevel->iIdxCur = iCovCur; + if( pAndExpr ){ + pAndExpr->pLeft = 0; + sqlite3ExprDelete(db, pAndExpr); + } + sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v)); + sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk); + sqlite3VdbeResolveLabel(v, iLoopBody); + + if( pWInfo->nLevel>1 ) sqlite3StackFree(db, pOrTab); + if( !untestedTerms ) disableTerm(pLevel, pTerm); + }else +#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + + { + /* Case 6: There is no usable index. We must do a complete + ** scan of the entire table. + */ + static const u8 aStep[] = { OP_Next, OP_Prev }; + static const u8 aStart[] = { OP_Rewind, OP_Last }; + assert( bRev==0 || bRev==1 ); + if( pTabItem->isRecursive ){ + /* Tables marked isRecursive have only a single row that is stored in + ** a pseudo-cursor. No need to Rewind or Next such cursors. */ + pLevel->op = OP_Noop; + }else{ + pLevel->op = aStep[bRev]; + pLevel->p1 = iCur; + pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); + VdbeCoverageIf(v, bRev==0); + VdbeCoverageIf(v, bRev!=0); + pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + } + } + +#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); +#endif + + /* Insert code to test every subexpression that can be completely + ** computed using the current set of tables. + */ + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE; + int skipLikeAddr = 0; + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + testcase( pWInfo->untestedTerms==0 + && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); + pWInfo->untestedTerms = 1; + continue; + } + pE = pTerm->pExpr; + assert( pE!=0 ); + if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ + continue; + } + if( pTerm->wtFlags & TERM_LIKECOND ){ + assert( pLevel->iLikeRepCntr>0 ); + skipLikeAddr = sqlite3VdbeAddOp1(v, OP_IfNot, pLevel->iLikeRepCntr); + VdbeCoverage(v); + } + sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); + if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); + pTerm->wtFlags |= TERM_CODED; + } + + /* Insert code to test for implied constraints based on transitivity + ** of the "==" operator. + ** + ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" + ** and we are coding the t1 loop and the t2 loop has not yet coded, + ** then we cannot use the "t1.a=t2.b" constraint, but we can code + ** the implied "t1.a=123" constraint. + */ + for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ + Expr *pE, *pEAlt; + WhereTerm *pAlt; + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) continue; + if( (pTerm->eOperator & WO_EQUIV)==0 ) continue; + if( pTerm->leftCursor!=iCur ) continue; + if( pLevel->iLeftJoin ) continue; + pE = pTerm->pExpr; + assert( !ExprHasProperty(pE, EP_FromJoin) ); + assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); + pAlt = sqlite3WhereFindTerm(pWC, iCur, pTerm->u.leftColumn, notReady, + WO_EQ|WO_IN|WO_IS, 0); + if( pAlt==0 ) continue; + if( pAlt->wtFlags & (TERM_CODED) ) continue; + testcase( pAlt->eOperator & WO_EQ ); + testcase( pAlt->eOperator & WO_IS ); + testcase( pAlt->eOperator & WO_IN ); + VdbeModuleComment((v, "begin transitive constraint")); + pEAlt = sqlite3StackAllocRaw(db, sizeof(*pEAlt)); + if( pEAlt ){ + *pEAlt = *pAlt->pExpr; + pEAlt->pLeft = pE->pLeft; + sqlite3ExprIfFalse(pParse, pEAlt, addrCont, SQLITE_JUMPIFNULL); + sqlite3StackFree(db, pEAlt); + } + } + + /* For a LEFT OUTER JOIN, generate code that will record the fact that + ** at least one row of the right table has matched the left table. + */ + if( pLevel->iLeftJoin ){ + pLevel->addrFirst = sqlite3VdbeCurrentAddr(v); + sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin); + VdbeComment((v, "record LEFT JOIN hit")); + sqlite3ExprCacheClear(pParse); + for(pTerm=pWC->a, j=0; j<pWC->nTerm; j++, pTerm++){ + testcase( pTerm->wtFlags & TERM_VIRTUAL ); + testcase( pTerm->wtFlags & TERM_CODED ); + if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; + if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ + assert( pWInfo->untestedTerms ); + continue; + } + assert( pTerm->pExpr ); + sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); + pTerm->wtFlags |= TERM_CODED; + } + } + + return pLevel->notReady; +} + +/************** End of wherecode.c *******************************************/ +/************** Begin file whereexpr.c ***************************************/ +/* +** 2015-06-08 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. +** +** This file was originally part of where.c but was split out to improve +** readability and editabiliity. This file contains utility routines for +** analyzing Expr objects in the WHERE clause. +*/ +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ + +/* Forward declarations */ +static void exprAnalyze(SrcList*, WhereClause*, int); + +/* +** Deallocate all memory associated with a WhereOrInfo object. +*/ +static void whereOrInfoDelete(sqlite3 *db, WhereOrInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} + +/* +** Deallocate all memory associated with a WhereAndInfo object. +*/ +static void whereAndInfoDelete(sqlite3 *db, WhereAndInfo *p){ + sqlite3WhereClauseClear(&p->wc); + sqlite3DbFree(db, p); +} + +/* +** Add a single new WhereTerm entry to the WhereClause object pWC. +** The new WhereTerm object is constructed from Expr p and with wtFlags. +** The index in pWC->a[] of the new WhereTerm is returned on success. +** 0 is returned if the new WhereTerm could not be added due to a memory +** allocation error. The memory allocation failure will be recorded in +** the db->mallocFailed flag so that higher-level functions can detect it. +** +** This routine will increase the size of the pWC->a[] array as necessary. +** +** If the wtFlags argument includes TERM_DYNAMIC, then responsibility +** for freeing the expression p is assumed by the WhereClause object pWC. +** This is true even if this routine fails to allocate a new WhereTerm. +** +** WARNING: This routine might reallocate the space used to store +** WhereTerms. All pointers to WhereTerms should be invalidated after +** calling this routine. Such pointers may be reinitialized by referencing +** the pWC->a[] array. +*/ +static int whereClauseInsert(WhereClause *pWC, Expr *p, u16 wtFlags){ + WhereTerm *pTerm; + int idx; + testcase( wtFlags & TERM_VIRTUAL ); + if( pWC->nTerm>=pWC->nSlot ){ + WhereTerm *pOld = pWC->a; + sqlite3 *db = pWC->pWInfo->pParse->db; + pWC->a = sqlite3DbMallocRaw(db, sizeof(pWC->a[0])*pWC->nSlot*2 ); + if( pWC->a==0 ){ + if( wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, p); + } + pWC->a = pOld; + return 0; + } + memcpy(pWC->a, pOld, sizeof(pWC->a[0])*pWC->nTerm); + if( pOld!=pWC->aStatic ){ + sqlite3DbFree(db, pOld); + } + pWC->nSlot = sqlite3DbMallocSize(db, pWC->a)/sizeof(pWC->a[0]); + memset(&pWC->a[pWC->nTerm], 0, sizeof(pWC->a[0])*(pWC->nSlot-pWC->nTerm)); + } + pTerm = &pWC->a[idx = pWC->nTerm++]; + if( p && ExprHasProperty(p, EP_Unlikely) ){ + pTerm->truthProb = sqlite3LogEst(p->iTable) - 270; + }else{ + pTerm->truthProb = 1; + } + pTerm->pExpr = sqlite3ExprSkipCollate(p); + pTerm->wtFlags = wtFlags; + pTerm->pWC = pWC; + pTerm->iParent = -1; + return idx; +} + +/* +** Return TRUE if the given operator is one of the operators that is +** allowed for an indexable WHERE clause term. The allowed operators are +** "=", "<", ">", "<=", ">=", "IN", and "IS NULL" +*/ +static int allowedOp(int op){ + assert( TK_GT>TK_EQ && TK_GT<TK_GE ); + assert( TK_LT>TK_EQ && TK_LT<TK_GE ); + assert( TK_LE>TK_EQ && TK_LE<TK_GE ); + assert( TK_GE==TK_EQ+4 ); + return op==TK_IN || (op>=TK_EQ && op<=TK_GE) || op==TK_ISNULL || op==TK_IS; +} + +/* +** Commute a comparison operator. Expressions of the form "X op Y" +** are converted into "Y op X". +** +** If left/right precedence rules come into play when determining the +** collating sequence, then COLLATE operators are adjusted to ensure +** that the collating sequence does not change. For example: +** "Y collate NOCASE op X" becomes "X op Y" because any collation sequence on +** the left hand side of a comparison overrides any collation sequence +** attached to the right. For the same reason the EP_Collate flag +** is not commuted. +*/ +static void exprCommute(Parse *pParse, Expr *pExpr){ + u16 expRight = (pExpr->pRight->flags & EP_Collate); + u16 expLeft = (pExpr->pLeft->flags & EP_Collate); + assert( allowedOp(pExpr->op) && pExpr->op!=TK_IN ); + if( expRight==expLeft ){ + /* Either X and Y both have COLLATE operator or neither do */ + if( expRight ){ + /* Both X and Y have COLLATE operators. Make sure X is always + ** used by clearing the EP_Collate flag from Y. */ + pExpr->pRight->flags &= ~EP_Collate; + }else if( sqlite3ExprCollSeq(pParse, pExpr->pLeft)!=0 ){ + /* Neither X nor Y have COLLATE operators, but X has a non-default + ** collating sequence. So add the EP_Collate marker on X to cause + ** it to be searched first. */ + pExpr->pLeft->flags |= EP_Collate; + } + } + SWAP(Expr*,pExpr->pRight,pExpr->pLeft); + if( pExpr->op>=TK_GT ){ + assert( TK_LT==TK_GT+2 ); + assert( TK_GE==TK_LE+2 ); + assert( TK_GT>TK_EQ ); + assert( TK_GT<TK_LE ); + assert( pExpr->op>=TK_GT && pExpr->op<=TK_GE ); + pExpr->op = ((pExpr->op-TK_GT)^2)+TK_GT; + } +} + +/* +** Translate from TK_xx operator to WO_xx bitmask. +*/ +static u16 operatorMask(int op){ + u16 c; + assert( allowedOp(op) ); + if( op==TK_IN ){ + c = WO_IN; + }else if( op==TK_ISNULL ){ + c = WO_ISNULL; + }else if( op==TK_IS ){ + c = WO_IS; + }else{ + assert( (WO_EQ<<(op-TK_EQ)) < 0x7fff ); + c = (u16)(WO_EQ<<(op-TK_EQ)); + } + assert( op!=TK_ISNULL || c==WO_ISNULL ); + assert( op!=TK_IN || c==WO_IN ); + assert( op!=TK_EQ || c==WO_EQ ); + assert( op!=TK_LT || c==WO_LT ); + assert( op!=TK_LE || c==WO_LE ); + assert( op!=TK_GT || c==WO_GT ); + assert( op!=TK_GE || c==WO_GE ); + assert( op!=TK_IS || c==WO_IS ); + return c; +} + + +#ifndef SQLITE_OMIT_LIKE_OPTIMIZATION +/* +** Check to see if the given expression is a LIKE or GLOB operator that +** can be optimized using inequality constraints. Return TRUE if it is +** so and false if not. +** +** In order for the operator to be optimizible, the RHS must be a string +** literal that does not begin with a wildcard. The LHS must be a column +** that may only be NULL, a string, or a BLOB, never a number. (This means +** that virtual tables cannot participate in the LIKE optimization.) The +** collating sequence for the column on the LHS must be appropriate for +** the operator. +*/ +static int isLikeOrGlob( Parse *pParse, /* Parsing and code generating context */ Expr *pExpr, /* Test this expression */ Expr **ppPrefix, /* Pointer to TK_STRING expression with pattern prefix */ @@ -116847,7 +119133,7 @@ static int isLikeOrGlob( if( op==TK_VARIABLE ){ Vdbe *pReprepare = pParse->pReprepare; int iCol = pRight->iColumn; - pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_NONE); + pVal = sqlite3VdbeGetBoundValue(pReprepare, iCol, SQLITE_AFF_BLOB); if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){ z = (char *)sqlite3_value_text(pVal); } @@ -117058,7 +119344,7 @@ static void whereCombineDisjuncts( ** ** CASE 2: ** -** If there are exactly two disjuncts one side has x>A and the other side +** If there are exactly two disjuncts and one side has x>A and the other side ** has x=A (for the same x and A) then add a new virtual conjunct term to the ** WHERE clause of the form "x>=A". Example: ** @@ -117087,22 +119373,22 @@ static void whereCombineDisjuncts( ** is decided elsewhere. This analysis only looks at whether subterms ** appropriate for indexing exist. ** -** All examples A through E above satisfy case 2. But if a term +** All examples A through E above satisfy case 3. But if a term ** also satisfies case 1 (such as B) we know that the optimizer will -** always prefer case 1, so in that case we pretend that case 2 is not +** always prefer case 1, so in that case we pretend that case 3 is not ** satisfied. ** ** It might be the case that multiple tables are indexable. For example, ** (E) above is indexable on tables P, Q, and R. ** -** Terms that satisfy case 2 are candidates for lookup by using +** Terms that satisfy case 3 are candidates for lookup by using ** separate indices to find rowids for each subterm and composing ** the union of all rowids using a RowSet object. This is similar ** to "bitmap indices" in other database engines. ** ** OTHERWISE: ** -** If neither case 1 nor case 2 apply, then leave the eOperator set to +** If none of cases 1, 2, or 3 apply, then leave the eOperator set to ** zero. This term is not useful for search. */ static void exprAnalyzeOrTerm( @@ -117133,14 +119419,14 @@ static void exprAnalyzeOrTerm( if( pOrInfo==0 ) return; pTerm->wtFlags |= TERM_ORINFO; pOrWc = &pOrInfo->wc; - whereClauseInit(pOrWc, pWInfo); - whereSplit(pOrWc, pExpr, TK_OR); - exprAnalyzeAll(pSrc, pOrWc); + sqlite3WhereClauseInit(pOrWc, pWInfo); + sqlite3WhereSplit(pOrWc, pExpr, TK_OR); + sqlite3WhereExprAnalyze(pSrc, pOrWc); if( db->mallocFailed ) return; assert( pOrWc->nTerm>=2 ); /* - ** Compute the set of tables that might satisfy cases 1 or 2. + ** Compute the set of tables that might satisfy cases 1 or 3. */ indexable = ~(Bitmask)0; chngToIN = ~(Bitmask)0; @@ -117159,16 +119445,16 @@ static void exprAnalyzeOrTerm( pOrTerm->wtFlags |= TERM_ANDINFO; pOrTerm->eOperator = WO_AND; pAndWC = &pAndInfo->wc; - whereClauseInit(pAndWC, pWC->pWInfo); - whereSplit(pAndWC, pOrTerm->pExpr, TK_AND); - exprAnalyzeAll(pSrc, pAndWC); + sqlite3WhereClauseInit(pAndWC, pWC->pWInfo); + sqlite3WhereSplit(pAndWC, pOrTerm->pExpr, TK_AND); + sqlite3WhereExprAnalyze(pSrc, pAndWC); pAndWC->pOuter = pWC; testcase( db->mallocFailed ); if( !db->mallocFailed ){ for(j=0, pAndTerm=pAndWC->a; j<pAndWC->nTerm; j++, pAndTerm++){ assert( pAndTerm->pExpr ); if( allowedOp(pAndTerm->pExpr->op) ){ - b |= getMask(&pWInfo->sMaskSet, pAndTerm->leftCursor); + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pAndTerm->leftCursor); } } } @@ -117179,10 +119465,10 @@ static void exprAnalyzeOrTerm( ** corresponding TERM_VIRTUAL term */ }else{ Bitmask b; - b = getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor); + b = sqlite3WhereGetMask(&pWInfo->sMaskSet, pOrTerm->leftCursor); if( pOrTerm->wtFlags & TERM_VIRTUAL ){ WhereTerm *pOther = &pOrWc->a[pOrTerm->iParent]; - b |= getMask(&pWInfo->sMaskSet, pOther->leftCursor); + b |= sqlite3WhereGetMask(&pWInfo->sMaskSet, pOther->leftCursor); } indexable &= b; if( (pOrTerm->eOperator & WO_EQ)==0 ){ @@ -117258,7 +119544,8 @@ static void exprAnalyzeOrTerm( assert( j==1 ); continue; } - if( (chngToIN & getMask(&pWInfo->sMaskSet, pOrTerm->leftCursor))==0 ){ + if( (chngToIN & sqlite3WhereGetMask(&pWInfo->sMaskSet, + pOrTerm->leftCursor))==0 ){ /* This term must be of the form t1.a==t2.b where t2 is in the ** chngToIN set but t1 is not. This term will be either preceded ** or follwed by an inverted copy (t2.b==t1.a). Skip this term @@ -117277,7 +119564,7 @@ static void exprAnalyzeOrTerm( ** on the second iteration */ assert( j==1 ); assert( IsPowerOfTwo(chngToIN) ); - assert( chngToIN==getMask(&pWInfo->sMaskSet, iCursor) ); + assert( chngToIN==sqlite3WhereGetMask(&pWInfo->sMaskSet, iCursor) ); break; } testcase( j==1 ); @@ -117349,6 +119636,72 @@ static void exprAnalyzeOrTerm( } #endif /* !SQLITE_OMIT_OR_OPTIMIZATION && !SQLITE_OMIT_SUBQUERY */ +/* +** We already know that pExpr is a binary operator where both operands are +** column references. This routine checks to see if pExpr is an equivalence +** relation: +** 1. The SQLITE_Transitive optimization must be enabled +** 2. Must be either an == or an IS operator +** 3. Not originating in the ON clause of an OUTER JOIN +** 4. The affinities of A and B must be compatible +** 5a. Both operands use the same collating sequence OR +** 5b. The overall collating sequence is BINARY +** If this routine returns TRUE, that means that the RHS can be substituted +** for the LHS anyplace else in the WHERE clause where the LHS column occurs. +** This is an optimization. No harm comes from returning 0. But if 1 is +** returned when it should not be, then incorrect answers might result. +*/ +static int termIsEquivalence(Parse *pParse, Expr *pExpr){ + char aff1, aff2; + CollSeq *pColl; + const char *zColl1, *zColl2; + if( !OptimizationEnabled(pParse->db, SQLITE_Transitive) ) return 0; + if( pExpr->op!=TK_EQ && pExpr->op!=TK_IS ) return 0; + if( ExprHasProperty(pExpr, EP_FromJoin) ) return 0; + aff1 = sqlite3ExprAffinity(pExpr->pLeft); + aff2 = sqlite3ExprAffinity(pExpr->pRight); + if( aff1!=aff2 + && (!sqlite3IsNumericAffinity(aff1) || !sqlite3IsNumericAffinity(aff2)) + ){ + return 0; + } + pColl = sqlite3BinaryCompareCollSeq(pParse, pExpr->pLeft, pExpr->pRight); + if( pColl==0 || sqlite3StrICmp(pColl->zName, "BINARY")==0 ) return 1; + pColl = sqlite3ExprCollSeq(pParse, pExpr->pLeft); + /* Since pLeft and pRight are both a column references, their collating + ** sequence should always be defined. */ + zColl1 = ALWAYS(pColl) ? pColl->zName : 0; + pColl = sqlite3ExprCollSeq(pParse, pExpr->pRight); + zColl2 = ALWAYS(pColl) ? pColl->zName : 0; + return sqlite3StrICmp(zColl1, zColl2)==0; +} + +/* +** Recursively walk the expressions of a SELECT statement and generate +** a bitmask indicating which tables are used in that expression +** tree. +*/ +static Bitmask exprSelectUsage(WhereMaskSet *pMaskSet, Select *pS){ + Bitmask mask = 0; + while( pS ){ + SrcList *pSrc = pS->pSrc; + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pEList); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pGroupBy); + mask |= sqlite3WhereExprListUsage(pMaskSet, pS->pOrderBy); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pWhere); + mask |= sqlite3WhereExprUsage(pMaskSet, pS->pHaving); + if( ALWAYS(pSrc!=0) ){ + int i; + for(i=0; i<pSrc->nSrc; i++){ + mask |= exprSelectUsage(pMaskSet, pSrc->a[i].pSelect); + mask |= sqlite3WhereExprUsage(pMaskSet, pSrc->a[i].pOn); + } + } + pS = pS->pPrior; + } + return mask; +} + /* ** The input to this routine is an WhereTerm structure with only the ** "pExpr" field filled in. The job of this routine is to analyze the @@ -117393,23 +119746,23 @@ static void exprAnalyze( pMaskSet = &pWInfo->sMaskSet; pExpr = pTerm->pExpr; assert( pExpr->op!=TK_AS && pExpr->op!=TK_COLLATE ); - prereqLeft = exprTableUsage(pMaskSet, pExpr->pLeft); + prereqLeft = sqlite3WhereExprUsage(pMaskSet, pExpr->pLeft); op = pExpr->op; if( op==TK_IN ){ assert( pExpr->pRight==0 ); if( ExprHasProperty(pExpr, EP_xIsSelect) ){ - pTerm->prereqRight = exprSelectTableUsage(pMaskSet, pExpr->x.pSelect); + pTerm->prereqRight = exprSelectUsage(pMaskSet, pExpr->x.pSelect); }else{ - pTerm->prereqRight = exprListTableUsage(pMaskSet, pExpr->x.pList); + pTerm->prereqRight = sqlite3WhereExprListUsage(pMaskSet, pExpr->x.pList); } }else if( op==TK_ISNULL ){ pTerm->prereqRight = 0; }else{ - pTerm->prereqRight = exprTableUsage(pMaskSet, pExpr->pRight); + pTerm->prereqRight = sqlite3WhereExprUsage(pMaskSet, pExpr->pRight); } - prereqAll = exprTableUsage(pMaskSet, pExpr); + prereqAll = sqlite3WhereExprUsage(pMaskSet, pExpr); if( ExprHasProperty(pExpr, EP_FromJoin) ){ - Bitmask x = getMask(pMaskSet, pExpr->iRightJoinTable); + Bitmask x = sqlite3WhereGetMask(pMaskSet, pExpr->iRightJoinTable); prereqAll |= x; extraRight = x-1; /* ON clause terms may not be used with an index ** on left table of a LEFT JOIN. Ticket #3015 */ @@ -117427,6 +119780,7 @@ static void exprAnalyze( pTerm->u.leftColumn = pLeft->iColumn; pTerm->eOperator = operatorMask(op) & opMask; } + if( op==TK_IS ) pTerm->wtFlags |= TERM_IS; if( pRight && pRight->op==TK_COLUMN ){ WhereTerm *pNew; Expr *pDup; @@ -117442,12 +119796,11 @@ static void exprAnalyze( if( idxNew==0 ) return; pNew = &pWC->a[idxNew]; markTermAsChild(pWC, idxNew, idxTerm); + if( op==TK_IS ) pNew->wtFlags |= TERM_IS; pTerm = &pWC->a[idxTerm]; pTerm->wtFlags |= TERM_COPIED; - if( pExpr->op==TK_EQ - && !ExprHasProperty(pExpr, EP_FromJoin) - && OptimizationEnabled(db, SQLITE_Transitive) - ){ + + if( termIsEquivalence(pParse, pDup) ){ pTerm->eOperator |= WO_EQUIV; eExtraOp = WO_EQUIV; } @@ -117614,8 +119967,8 @@ static void exprAnalyze( pRight = pExpr->x.pList->a[0].pExpr; pLeft = pExpr->x.pList->a[1].pExpr; - prereqExpr = exprTableUsage(pMaskSet, pRight); - prereqColumn = exprTableUsage(pMaskSet, pLeft); + prereqExpr = sqlite3WhereExprUsage(pMaskSet, pRight); + prereqColumn = sqlite3WhereExprUsage(pMaskSet, pLeft); if( (prereqExpr & prereqColumn)==0 ){ Expr *pNewExpr; pNewExpr = sqlite3PExpr(pParse, TK_MATCH, @@ -117641,2682 +119994,1722 @@ static void exprAnalyze( ** as "x>NULL" if x is not an INTEGER PRIMARY KEY. So construct a ** virtual term of that form. ** - ** Note that the virtual term must be tagged with TERM_VNULL. This - ** TERM_VNULL tag will suppress the not-null check at the beginning - ** of the loop. Without the TERM_VNULL flag, the not-null check at - ** the start of the loop will prevent any results from being returned. + ** Note that the virtual term must be tagged with TERM_VNULL. */ if( pExpr->op==TK_NOTNULL && pExpr->pLeft->op==TK_COLUMN - && pExpr->pLeft->iColumn>=0 - && OptimizationEnabled(db, SQLITE_Stat34) - ){ - Expr *pNewExpr; - Expr *pLeft = pExpr->pLeft; - int idxNew; - WhereTerm *pNewTerm; - - pNewExpr = sqlite3PExpr(pParse, TK_GT, - sqlite3ExprDup(db, pLeft, 0), - sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); - - idxNew = whereClauseInsert(pWC, pNewExpr, - TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); - if( idxNew ){ - pNewTerm = &pWC->a[idxNew]; - pNewTerm->prereqRight = 0; - pNewTerm->leftCursor = pLeft->iTable; - pNewTerm->u.leftColumn = pLeft->iColumn; - pNewTerm->eOperator = WO_GT; - markTermAsChild(pWC, idxNew, idxTerm); - pTerm = &pWC->a[idxTerm]; - pTerm->wtFlags |= TERM_COPIED; - pNewTerm->prereqAll = pTerm->prereqAll; - } - } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - - /* Prevent ON clause terms of a LEFT JOIN from being used to drive - ** an index for tables to the left of the join. - */ - pTerm->prereqRight |= extraRight; -} - -/* -** This function searches pList for an entry that matches the iCol-th column -** of index pIdx. -** -** If such an expression is found, its index in pList->a[] is returned. If -** no expression is found, -1 is returned. -*/ -static int findIndexCol( - Parse *pParse, /* Parse context */ - ExprList *pList, /* Expression list to search */ - int iBase, /* Cursor for table associated with pIdx */ - Index *pIdx, /* Index to match column of */ - int iCol /* Column of index to match */ -){ - int i; - const char *zColl = pIdx->azColl[iCol]; - - for(i=0; i<pList->nExpr; i++){ - Expr *p = sqlite3ExprSkipCollate(pList->a[i].pExpr); - if( p->op==TK_COLUMN - && p->iColumn==pIdx->aiColumn[iCol] - && p->iTable==iBase - ){ - CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[i].pExpr); - if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ - return i; - } - } - } - - return -1; -} - -/* -** Return true if the DISTINCT expression-list passed as the third argument -** is redundant. -** -** A DISTINCT list is redundant if the database contains some subset of -** columns that are unique and non-null. -*/ -static int isDistinctRedundant( - Parse *pParse, /* Parsing context */ - SrcList *pTabList, /* The FROM clause */ - WhereClause *pWC, /* The WHERE clause */ - ExprList *pDistinct /* The result set that needs to be DISTINCT */ -){ - Table *pTab; - Index *pIdx; - int i; - int iBase; - - /* If there is more than one table or sub-select in the FROM clause of - ** this query, then it will not be possible to show that the DISTINCT - ** clause is redundant. */ - if( pTabList->nSrc!=1 ) return 0; - iBase = pTabList->a[0].iCursor; - pTab = pTabList->a[0].pTab; - - /* If any of the expressions is an IPK column on table iBase, then return - ** true. Note: The (p->iTable==iBase) part of this test may be false if the - ** current SELECT is a correlated sub-query. - */ - for(i=0; i<pDistinct->nExpr; i++){ - Expr *p = sqlite3ExprSkipCollate(pDistinct->a[i].pExpr); - if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; - } - - /* Loop through all indices on the table, checking each to see if it makes - ** the DISTINCT qualifier redundant. It does so if: - ** - ** 1. The index is itself UNIQUE, and - ** - ** 2. All of the columns in the index are either part of the pDistinct - ** list, or else the WHERE clause contains a term of the form "col=X", - ** where X is a constant value. The collation sequences of the - ** comparison and select-list expressions must match those of the index. - ** - ** 3. All of those index columns for which the WHERE clause does not - ** contain a "col=X" term are subject to a NOT NULL constraint. - */ - for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ - if( !IsUniqueIndex(pIdx) ) continue; - for(i=0; i<pIdx->nKeyCol; i++){ - i16 iCol = pIdx->aiColumn[i]; - if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ - int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i); - if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){ - break; - } - } - } - if( i==pIdx->nKeyCol ){ - /* This index implies that the DISTINCT qualifier is redundant. */ - return 1; - } - } - - return 0; -} - - -/* -** Estimate the logarithm of the input value to base 2. -*/ -static LogEst estLog(LogEst N){ - return N<=10 ? 0 : sqlite3LogEst(N) - 33; -} - -/* -** Two routines for printing the content of an sqlite3_index_info -** structure. Used for testing and debugging only. If neither -** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines -** are no-ops. -*/ -#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED) -static void TRACE_IDX_INPUTS(sqlite3_index_info *p){ - int i; - if( !sqlite3WhereTrace ) return; - for(i=0; i<p->nConstraint; i++){ - sqlite3DebugPrintf(" constraint[%d]: col=%d termid=%d op=%d usabled=%d\n", - i, - p->aConstraint[i].iColumn, - p->aConstraint[i].iTermOffset, - p->aConstraint[i].op, - p->aConstraint[i].usable); - } - for(i=0; i<p->nOrderBy; i++){ - sqlite3DebugPrintf(" orderby[%d]: col=%d desc=%d\n", - i, - p->aOrderBy[i].iColumn, - p->aOrderBy[i].desc); - } -} -static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){ - int i; - if( !sqlite3WhereTrace ) return; - for(i=0; i<p->nConstraint; i++){ - sqlite3DebugPrintf(" usage[%d]: argvIdx=%d omit=%d\n", - i, - p->aConstraintUsage[i].argvIndex, - p->aConstraintUsage[i].omit); - } - sqlite3DebugPrintf(" idxNum=%d\n", p->idxNum); - sqlite3DebugPrintf(" idxStr=%s\n", p->idxStr); - sqlite3DebugPrintf(" orderByConsumed=%d\n", p->orderByConsumed); - sqlite3DebugPrintf(" estimatedCost=%g\n", p->estimatedCost); - sqlite3DebugPrintf(" estimatedRows=%lld\n", p->estimatedRows); -} -#else -#define TRACE_IDX_INPUTS(A) -#define TRACE_IDX_OUTPUTS(A) -#endif - -#ifndef SQLITE_OMIT_AUTOMATIC_INDEX -/* -** Return TRUE if the WHERE clause term pTerm is of a form where it -** could be used with an index to access pSrc, assuming an appropriate -** index existed. -*/ -static int termCanDriveIndex( - WhereTerm *pTerm, /* WHERE clause term to check */ - struct SrcList_item *pSrc, /* Table we are trying to access */ - Bitmask notReady /* Tables in outer loops of the join */ -){ - char aff; - if( pTerm->leftCursor!=pSrc->iCursor ) return 0; - if( (pTerm->eOperator & WO_EQ)==0 ) return 0; - if( (pTerm->prereqRight & notReady)!=0 ) return 0; - if( pTerm->u.leftColumn<0 ) return 0; - aff = pSrc->pTab->aCol[pTerm->u.leftColumn].affinity; - if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0; - return 1; -} -#endif - - -#ifndef SQLITE_OMIT_AUTOMATIC_INDEX -/* -** Generate code to construct the Index object for an automatic index -** and to set up the WhereLevel object pLevel so that the code generator -** makes use of the automatic index. -*/ -static void constructAutomaticIndex( - Parse *pParse, /* The parsing context */ - WhereClause *pWC, /* The WHERE clause */ - struct SrcList_item *pSrc, /* The FROM clause term to get the next index */ - Bitmask notReady, /* Mask of cursors that are not available */ - WhereLevel *pLevel /* Write new index here */ -){ - int nKeyCol; /* Number of columns in the constructed index */ - WhereTerm *pTerm; /* A single term of the WHERE clause */ - WhereTerm *pWCEnd; /* End of pWC->a[] */ - Index *pIdx; /* Object describing the transient index */ - Vdbe *v; /* Prepared statement under construction */ - int addrInit; /* Address of the initialization bypass jump */ - Table *pTable; /* The table being indexed */ - int addrTop; /* Top of the index fill loop */ - int regRecord; /* Register holding an index record */ - int n; /* Column counter */ - int i; /* Loop counter */ - int mxBitCol; /* Maximum column in pSrc->colUsed */ - CollSeq *pColl; /* Collating sequence to on a column */ - WhereLoop *pLoop; /* The Loop object */ - char *zNotUsed; /* Extra space on the end of pIdx */ - Bitmask idxCols; /* Bitmap of columns used for indexing */ - Bitmask extraCols; /* Bitmap of additional columns */ - u8 sentWarning = 0; /* True if a warnning has been issued */ - Expr *pPartial = 0; /* Partial Index Expression */ - int iContinue = 0; /* Jump here to skip excluded rows */ - - /* Generate code to skip over the creation and initialization of the - ** transient index on 2nd and subsequent iterations of the loop. */ - v = pParse->pVdbe; - assert( v!=0 ); - addrInit = sqlite3CodeOnce(pParse); VdbeCoverage(v); - - /* Count the number of columns that will be added to the index - ** and used to match WHERE clause constraints */ - nKeyCol = 0; - pTable = pSrc->pTab; - pWCEnd = &pWC->a[pWC->nTerm]; - pLoop = pLevel->pWLoop; - idxCols = 0; - for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){ - Expr *pExpr = pTerm->pExpr; - assert( !ExprHasProperty(pExpr, EP_FromJoin) /* prereq always non-zero */ - || pExpr->iRightJoinTable!=pSrc->iCursor /* for the right-hand */ - || pLoop->prereq!=0 ); /* table of a LEFT JOIN */ - if( pLoop->prereq==0 - && (pTerm->wtFlags & TERM_VIRTUAL)==0 - && !ExprHasProperty(pExpr, EP_FromJoin) - && sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor) ){ - pPartial = sqlite3ExprAnd(pParse->db, pPartial, - sqlite3ExprDup(pParse->db, pExpr, 0)); - } - if( termCanDriveIndex(pTerm, pSrc, notReady) ){ - int iCol = pTerm->u.leftColumn; - Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); - testcase( iCol==BMS ); - testcase( iCol==BMS-1 ); - if( !sentWarning ){ - sqlite3_log(SQLITE_WARNING_AUTOINDEX, - "automatic index on %s(%s)", pTable->zName, - pTable->aCol[iCol].zName); - sentWarning = 1; - } - if( (idxCols & cMask)==0 ){ - if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ - goto end_auto_index_create; - } - pLoop->aLTerm[nKeyCol++] = pTerm; - idxCols |= cMask; - } - } - } - assert( nKeyCol>0 ); - pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol; - pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED - | WHERE_AUTO_INDEX; - - /* Count the number of additional columns needed to create a - ** covering index. A "covering index" is an index that contains all - ** columns that are needed by the query. With a covering index, the - ** original table never needs to be accessed. Automatic indices must - ** be a covering index because the index will not be updated if the - ** original table changes and the index and table cannot both be used - ** if they go out of sync. - */ - extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); - mxBitCol = MIN(BMS-1,pTable->nCol); - testcase( pTable->nCol==BMS-1 ); - testcase( pTable->nCol==BMS-2 ); - for(i=0; i<mxBitCol; i++){ - if( extraCols & MASKBIT(i) ) nKeyCol++; - } - if( pSrc->colUsed & MASKBIT(BMS-1) ){ - nKeyCol += pTable->nCol - BMS + 1; - } - - /* Construct the Index object to describe this index */ - pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); - if( pIdx==0 ) goto end_auto_index_create; - pLoop->u.btree.pIndex = pIdx; - pIdx->zName = "auto-index"; - pIdx->pTable = pTable; - n = 0; - idxCols = 0; - for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){ - if( termCanDriveIndex(pTerm, pSrc, notReady) ){ - int iCol = pTerm->u.leftColumn; - Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); - testcase( iCol==BMS-1 ); - testcase( iCol==BMS ); - if( (idxCols & cMask)==0 ){ - Expr *pX = pTerm->pExpr; - idxCols |= cMask; - pIdx->aiColumn[n] = pTerm->u.leftColumn; - pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); - pIdx->azColl[n] = pColl ? pColl->zName : "BINARY"; - n++; - } - } - } - assert( (u32)n==pLoop->u.btree.nEq ); - - /* Add additional columns needed to make the automatic index into - ** a covering index */ - for(i=0; i<mxBitCol; i++){ - if( extraCols & MASKBIT(i) ){ - pIdx->aiColumn[n] = i; - pIdx->azColl[n] = "BINARY"; - n++; - } - } - if( pSrc->colUsed & MASKBIT(BMS-1) ){ - for(i=BMS-1; i<pTable->nCol; i++){ - pIdx->aiColumn[n] = i; - pIdx->azColl[n] = "BINARY"; - n++; - } - } - assert( n==nKeyCol ); - pIdx->aiColumn[n] = -1; - pIdx->azColl[n] = "BINARY"; - - /* Create the automatic index */ - assert( pLevel->iIdxCur>=0 ); - pLevel->iIdxCur = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); - sqlite3VdbeSetP4KeyInfo(pParse, pIdx); - VdbeComment((v, "for %s", pTable->zName)); - - /* Fill the automatic index with content */ - sqlite3ExprCachePush(pParse); - addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); - if( pPartial ){ - iContinue = sqlite3VdbeMakeLabel(v); - sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); - pLoop->wsFlags |= WHERE_PARTIALIDX; - } - regRecord = sqlite3GetTempReg(pParse); - sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0); - sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); - sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); - if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); - sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); - sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); - sqlite3VdbeJumpHere(v, addrTop); - sqlite3ReleaseTempReg(pParse, regRecord); - sqlite3ExprCachePop(pParse); - - /* Jump here when skipping the initialization */ - sqlite3VdbeJumpHere(v, addrInit); - -end_auto_index_create: - sqlite3ExprDelete(pParse->db, pPartial); -} -#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ - -#ifndef SQLITE_OMIT_VIRTUALTABLE -/* -** Allocate and populate an sqlite3_index_info structure. It is the -** responsibility of the caller to eventually release the structure -** by passing the pointer returned by this function to sqlite3_free(). -*/ -static sqlite3_index_info *allocateIndexInfo( - Parse *pParse, - WhereClause *pWC, - struct SrcList_item *pSrc, - ExprList *pOrderBy -){ - int i, j; - int nTerm; - struct sqlite3_index_constraint *pIdxCons; - struct sqlite3_index_orderby *pIdxOrderBy; - struct sqlite3_index_constraint_usage *pUsage; - WhereTerm *pTerm; - int nOrderBy; - sqlite3_index_info *pIdxInfo; - - /* Count the number of possible WHERE clause constraints referring - ** to this virtual table */ - for(i=nTerm=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ - if( pTerm->leftCursor != pSrc->iCursor ) continue; - assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); - testcase( pTerm->eOperator & WO_IN ); - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_ALL ); - if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue; - if( pTerm->wtFlags & TERM_VNULL ) continue; - nTerm++; - } - - /* If the ORDER BY clause contains only columns in the current - ** virtual table then allocate space for the aOrderBy part of - ** the sqlite3_index_info structure. - */ - nOrderBy = 0; - if( pOrderBy ){ - int n = pOrderBy->nExpr; - for(i=0; i<n; i++){ - Expr *pExpr = pOrderBy->a[i].pExpr; - if( pExpr->op!=TK_COLUMN || pExpr->iTable!=pSrc->iCursor ) break; - } - if( i==n){ - nOrderBy = n; - } - } - - /* Allocate the sqlite3_index_info structure - */ - pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo) - + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm - + sizeof(*pIdxOrderBy)*nOrderBy ); - if( pIdxInfo==0 ){ - sqlite3ErrorMsg(pParse, "out of memory"); - return 0; - } - - /* Initialize the structure. The sqlite3_index_info structure contains - ** many fields that are declared "const" to prevent xBestIndex from - ** changing them. We have to do some funky casting in order to - ** initialize those fields. - */ - pIdxCons = (struct sqlite3_index_constraint*)&pIdxInfo[1]; - pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm]; - pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy]; - *(int*)&pIdxInfo->nConstraint = nTerm; - *(int*)&pIdxInfo->nOrderBy = nOrderBy; - *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; - *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; - *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = - pUsage; - - for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ - u8 op; - if( pTerm->leftCursor != pSrc->iCursor ) continue; - assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); - testcase( pTerm->eOperator & WO_IN ); - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_ALL ); - if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV))==0 ) continue; - if( pTerm->wtFlags & TERM_VNULL ) continue; - pIdxCons[j].iColumn = pTerm->u.leftColumn; - pIdxCons[j].iTermOffset = i; - op = (u8)pTerm->eOperator & WO_ALL; - if( op==WO_IN ) op = WO_EQ; - pIdxCons[j].op = op; - /* The direct assignment in the previous line is possible only because - ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The - ** following asserts verify this fact. */ - assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); - assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); - assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); - assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); - assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); - assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); - assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); - j++; - } - for(i=0; i<nOrderBy; i++){ - Expr *pExpr = pOrderBy->a[i].pExpr; - pIdxOrderBy[i].iColumn = pExpr->iColumn; - pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; - } - - return pIdxInfo; -} - -/* -** The table object reference passed as the second argument to this function -** must represent a virtual table. This function invokes the xBestIndex() -** method of the virtual table with the sqlite3_index_info object that -** comes in as the 3rd argument to this function. -** -** If an error occurs, pParse is populated with an error message and a -** non-zero value is returned. Otherwise, 0 is returned and the output -** part of the sqlite3_index_info structure is left populated. -** -** Whether or not an error is returned, it is the responsibility of the -** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates -** that this is required. -*/ -static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ - sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; - int i; - int rc; - - TRACE_IDX_INPUTS(p); - rc = pVtab->pModule->xBestIndex(pVtab, p); - TRACE_IDX_OUTPUTS(p); - - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_NOMEM ){ - pParse->db->mallocFailed = 1; - }else if( !pVtab->zErrMsg ){ - sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); - }else{ - sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg); - } - } - sqlite3_free(pVtab->zErrMsg); - pVtab->zErrMsg = 0; - - for(i=0; i<p->nConstraint; i++){ - if( !p->aConstraint[i].usable && p->aConstraintUsage[i].argvIndex>0 ){ - sqlite3ErrorMsg(pParse, - "table %s: xBestIndex returned an invalid plan", pTab->zName); - } - } - - return pParse->nErr; -} -#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ - -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 -/* -** Estimate the location of a particular key among all keys in an -** index. Store the results in aStat as follows: -** -** aStat[0] Est. number of rows less than pRec -** aStat[1] Est. number of rows equal to pRec -** -** Return the index of the sample that is the smallest sample that -** is greater than or equal to pRec. Note that this index is not an index -** into the aSample[] array - it is an index into a virtual set of samples -** based on the contents of aSample[] and the number of fields in record -** pRec. -*/ -static int whereKeyStats( - Parse *pParse, /* Database connection */ - Index *pIdx, /* Index to consider domain of */ - UnpackedRecord *pRec, /* Vector of values to consider */ - int roundUp, /* Round up if true. Round down if false */ - tRowcnt *aStat /* OUT: stats written here */ -){ - IndexSample *aSample = pIdx->aSample; - int iCol; /* Index of required stats in anEq[] etc. */ - int i; /* Index of first sample >= pRec */ - int iSample; /* Smallest sample larger than or equal to pRec */ - int iMin = 0; /* Smallest sample not yet tested */ - int iTest; /* Next sample to test */ - int res; /* Result of comparison operation */ - int nField; /* Number of fields in pRec */ - tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ - -#ifndef SQLITE_DEBUG - UNUSED_PARAMETER( pParse ); -#endif - assert( pRec!=0 ); - assert( pIdx->nSample>0 ); - assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); - - /* Do a binary search to find the first sample greater than or equal - ** to pRec. If pRec contains a single field, the set of samples to search - ** is simply the aSample[] array. If the samples in aSample[] contain more - ** than one fields, all fields following the first are ignored. - ** - ** If pRec contains N fields, where N is more than one, then as well as the - ** samples in aSample[] (truncated to N fields), the search also has to - ** consider prefixes of those samples. For example, if the set of samples - ** in aSample is: - ** - ** aSample[0] = (a, 5) - ** aSample[1] = (a, 10) - ** aSample[2] = (b, 5) - ** aSample[3] = (c, 100) - ** aSample[4] = (c, 105) - ** - ** Then the search space should ideally be the samples above and the - ** unique prefixes [a], [b] and [c]. But since that is hard to organize, - ** the code actually searches this set: - ** - ** 0: (a) - ** 1: (a, 5) - ** 2: (a, 10) - ** 3: (a, 10) - ** 4: (b) - ** 5: (b, 5) - ** 6: (c) - ** 7: (c, 100) - ** 8: (c, 105) - ** 9: (c, 105) - ** - ** For each sample in the aSample[] array, N samples are present in the - ** effective sample array. In the above, samples 0 and 1 are based on - ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. - ** - ** Often, sample i of each block of N effective samples has (i+1) fields. - ** Except, each sample may be extended to ensure that it is greater than or - ** equal to the previous sample in the array. For example, in the above, - ** sample 2 is the first sample of a block of N samples, so at first it - ** appears that it should be 1 field in size. However, that would make it - ** smaller than sample 1, so the binary search would not work. As a result, - ** it is extended to two fields. The duplicates that this creates do not - ** cause any problems. - */ - nField = pRec->nField; - iCol = 0; - iSample = pIdx->nSample * nField; - do{ - int iSamp; /* Index in aSample[] of test sample */ - int n; /* Number of fields in test sample */ - - iTest = (iMin+iSample)/2; - iSamp = iTest / nField; - if( iSamp>0 ){ - /* The proposed effective sample is a prefix of sample aSample[iSamp]. - ** Specifically, the shortest prefix of at least (1 + iTest%nField) - ** fields that is greater than the previous effective sample. */ - for(n=(iTest % nField) + 1; n<nField; n++){ - if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break; - } - }else{ - n = iTest + 1; - } - - pRec->nField = n; - res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); - if( res<0 ){ - iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; - iMin = iTest+1; - }else if( res==0 && n<nField ){ - iLower = aSample[iSamp].anLt[n-1]; - iMin = iTest+1; - res = -1; - }else{ - iSample = iTest; - iCol = n-1; - } - }while( res && iMin<iSample ); - i = iSample / nField; - -#ifdef SQLITE_DEBUG - /* The following assert statements check that the binary search code - ** above found the right answer. This block serves no purpose other - ** than to invoke the asserts. */ - if( pParse->db->mallocFailed==0 ){ - if( res==0 ){ - /* If (res==0) is true, then pRec must be equal to sample i. */ - assert( i<pIdx->nSample ); - assert( iCol==nField-1 ); - pRec->nField = nField; - assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) - || pParse->db->mallocFailed - ); - }else{ - /* Unless i==pIdx->nSample, indicating that pRec is larger than - ** all samples in the aSample[] array, pRec must be smaller than the - ** (iCol+1) field prefix of sample i. */ - assert( i<=pIdx->nSample && i>=0 ); - pRec->nField = iCol+1; - assert( i==pIdx->nSample - || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 - || pParse->db->mallocFailed ); - - /* if i==0 and iCol==0, then record pRec is smaller than all samples - ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must - ** be greater than or equal to the (iCol) field prefix of sample i. - ** If (i>0), then pRec must also be greater than sample (i-1). */ - if( iCol>0 ){ - pRec->nField = iCol; - assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 - || pParse->db->mallocFailed ); - } - if( i>0 ){ - pRec->nField = nField; - assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 - || pParse->db->mallocFailed ); - } - } - } -#endif /* ifdef SQLITE_DEBUG */ - - if( res==0 ){ - /* Record pRec is equal to sample i */ - assert( iCol==nField-1 ); - aStat[0] = aSample[i].anLt[iCol]; - aStat[1] = aSample[i].anEq[iCol]; - }else{ - /* At this point, the (iCol+1) field prefix of aSample[i] is the first - ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec - ** is larger than all samples in the array. */ - tRowcnt iUpper, iGap; - if( i>=pIdx->nSample ){ - iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); - }else{ - iUpper = aSample[i].anLt[iCol]; - } - - if( iLower>=iUpper ){ - iGap = 0; - }else{ - iGap = iUpper - iLower; - } - if( roundUp ){ - iGap = (iGap*2)/3; - }else{ - iGap = iGap/3; - } - aStat[0] = iLower + iGap; - aStat[1] = pIdx->aAvgEq[iCol]; - } - - /* Restore the pRec->nField value before returning. */ - pRec->nField = nField; - return i; -} -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - -/* -** If it is not NULL, pTerm is a term that provides an upper or lower -** bound on a range scan. Without considering pTerm, it is estimated -** that the scan will visit nNew rows. This function returns the number -** estimated to be visited after taking pTerm into account. -** -** If the user explicitly specified a likelihood() value for this term, -** then the return value is the likelihood multiplied by the number of -** input rows. Otherwise, this function assumes that an "IS NOT NULL" term -** has a likelihood of 0.50, and any other term a likelihood of 0.25. -*/ -static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ - LogEst nRet = nNew; - if( pTerm ){ - if( pTerm->truthProb<=0 ){ - nRet += pTerm->truthProb; - }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ - nRet -= 20; assert( 20==sqlite3LogEst(4) ); - } - } - return nRet; -} - -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 -/* -** This function is called to estimate the number of rows visited by a -** range-scan on a skip-scan index. For example: -** -** CREATE INDEX i1 ON t1(a, b, c); -** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; -** -** Value pLoop->nOut is currently set to the estimated number of rows -** visited for scanning (a=? AND b=?). This function reduces that estimate -** by some factor to account for the (c BETWEEN ? AND ?) expression based -** on the stat4 data for the index. this scan will be peformed multiple -** times (once for each (a,b) combination that matches a=?) is dealt with -** by the caller. -** -** It does this by scanning through all stat4 samples, comparing values -** extracted from pLower and pUpper with the corresponding column in each -** sample. If L and U are the number of samples found to be less than or -** equal to the values extracted from pLower and pUpper respectively, and -** N is the total number of samples, the pLoop->nOut value is adjusted -** as follows: -** -** nOut = nOut * ( min(U - L, 1) / N ) -** -** If pLower is NULL, or a value cannot be extracted from the term, L is -** set to zero. If pUpper is NULL, or a value cannot be extracted from it, -** U is set to N. -** -** Normally, this function sets *pbDone to 1 before returning. However, -** if no value can be extracted from either pLower or pUpper (and so the -** estimate of the number of rows delivered remains unchanged), *pbDone -** is left as is. -** -** If an error occurs, an SQLite error code is returned. Otherwise, -** SQLITE_OK. -*/ -static int whereRangeSkipScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ - WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereLoop *pLoop, /* Update the .nOut value of this loop */ - int *pbDone /* Set to true if at least one expr. value extracted */ -){ - Index *p = pLoop->u.btree.pIndex; - int nEq = pLoop->u.btree.nEq; - sqlite3 *db = pParse->db; - int nLower = -1; - int nUpper = p->nSample+1; - int rc = SQLITE_OK; - int iCol = p->aiColumn[nEq]; - u8 aff = iCol>=0 ? p->pTable->aCol[iCol].affinity : SQLITE_AFF_INTEGER; - CollSeq *pColl; - - sqlite3_value *p1 = 0; /* Value extracted from pLower */ - sqlite3_value *p2 = 0; /* Value extracted from pUpper */ - sqlite3_value *pVal = 0; /* Value extracted from record */ - - pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); - if( pLower ){ - rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); - nLower = 0; - } - if( pUpper && rc==SQLITE_OK ){ - rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); - nUpper = p2 ? 0 : p->nSample; - } - - if( p1 || p2 ){ - int i; - int nDiff; - for(i=0; rc==SQLITE_OK && i<p->nSample; i++){ - rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); - if( rc==SQLITE_OK && p1 ){ - int res = sqlite3MemCompare(p1, pVal, pColl); - if( res>=0 ) nLower++; - } - if( rc==SQLITE_OK && p2 ){ - int res = sqlite3MemCompare(p2, pVal, pColl); - if( res>=0 ) nUpper++; - } - } - nDiff = (nUpper - nLower); - if( nDiff<=0 ) nDiff = 1; + && pExpr->pLeft->iColumn>=0 + && OptimizationEnabled(db, SQLITE_Stat34) + ){ + Expr *pNewExpr; + Expr *pLeft = pExpr->pLeft; + int idxNew; + WhereTerm *pNewTerm; - /* If there is both an upper and lower bound specified, and the - ** comparisons indicate that they are close together, use the fallback - ** method (assume that the scan visits 1/64 of the rows) for estimating - ** the number of rows visited. Otherwise, estimate the number of rows - ** using the method described in the header comment for this function. */ - if( nDiff!=1 || pUpper==0 || pLower==0 ){ - int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); - pLoop->nOut -= nAdjust; - *pbDone = 1; - WHERETRACE(0x10, ("range skip-scan regions: %u..%u adjust=%d est=%d\n", - nLower, nUpper, nAdjust*-1, pLoop->nOut)); - } + pNewExpr = sqlite3PExpr(pParse, TK_GT, + sqlite3ExprDup(db, pLeft, 0), + sqlite3PExpr(pParse, TK_NULL, 0, 0, 0), 0); - }else{ - assert( *pbDone==0 ); + idxNew = whereClauseInsert(pWC, pNewExpr, + TERM_VIRTUAL|TERM_DYNAMIC|TERM_VNULL); + if( idxNew ){ + pNewTerm = &pWC->a[idxNew]; + pNewTerm->prereqRight = 0; + pNewTerm->leftCursor = pLeft->iTable; + pNewTerm->u.leftColumn = pLeft->iColumn; + pNewTerm->eOperator = WO_GT; + markTermAsChild(pWC, idxNew, idxTerm); + pTerm = &pWC->a[idxTerm]; + pTerm->wtFlags |= TERM_COPIED; + pNewTerm->prereqAll = pTerm->prereqAll; + } } +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - sqlite3ValueFree(p1); - sqlite3ValueFree(p2); - sqlite3ValueFree(pVal); - - return rc; + /* Prevent ON clause terms of a LEFT JOIN from being used to drive + ** an index for tables to the left of the join. + */ + pTerm->prereqRight |= extraRight; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + +/*************************************************************************** +** Routines with file scope above. Interface to the rest of the where.c +** subsystem follows. +***************************************************************************/ /* -** This function is used to estimate the number of rows that will be visited -** by scanning an index for a range of values. The range may have an upper -** bound, a lower bound, or both. The WHERE clause terms that set the upper -** and lower bounds are represented by pLower and pUpper respectively. For -** example, assuming that index p is on t1(a): -** -** ... FROM t1 WHERE a > ? AND a < ? ... -** |_____| |_____| -** | | -** pLower pUpper -** -** If either of the upper or lower bound is not present, then NULL is passed in -** place of the corresponding WhereTerm. -** -** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index -** column subject to the range constraint. Or, equivalently, the number of -** equality constraints optimized by the proposed index scan. For example, -** assuming index p is on t1(a, b), and the SQL query is: -** -** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... -** -** then nEq is set to 1 (as the range restricted column, b, is the second -** left-most column of the index). Or, if the query is: +** This routine identifies subexpressions in the WHERE clause where +** each subexpression is separated by the AND operator or some other +** operator specified in the op parameter. The WhereClause structure +** is filled with pointers to subexpressions. For example: ** -** ... FROM t1 WHERE a > ? AND a < ? ... +** WHERE a=='hello' AND coalesce(b,11)<10 AND (c+12!=d OR c==22) +** \________/ \_______________/ \________________/ +** slot[0] slot[1] slot[2] ** -** then nEq is set to 0. +** The original WHERE clause in pExpr is unaltered. All this routine +** does is make slot[] entries point to substructure within pExpr. ** -** When this function is called, *pnOut is set to the sqlite3LogEst() of the -** number of rows that the index scan is expected to visit without -** considering the range constraints. If nEq is 0, then *pnOut is the number of -** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) -** to account for the range constraints pLower and pUpper. -** -** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be -** used, a single range inequality reduces the search space by a factor of 4. -** and a pair of constraints (x>? AND x<?) reduces the expected number of -** rows visited by a factor of 64. +** In the previous sentence and in the diagram, "slot[]" refers to +** the WhereClause.a[] array. The slot[] array grows as needed to contain +** all terms of the WHERE clause. */ -static int whereRangeScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereLoopBuilder *pBuilder, - WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ - WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ - WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ -){ - int rc = SQLITE_OK; - int nOut = pLoop->nOut; - LogEst nNew; - -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 - Index *p = pLoop->u.btree.pIndex; - int nEq = pLoop->u.btree.nEq; - - if( p->nSample>0 && nEq<p->nSampleCol ){ - if( nEq==pBuilder->nRecValid ){ - UnpackedRecord *pRec = pBuilder->pRec; - tRowcnt a[2]; - u8 aff; - - /* Variable iLower will be set to the estimate of the number of rows in - ** the index that are less than the lower bound of the range query. The - ** lower bound being the concatenation of $P and $L, where $P is the - ** key-prefix formed by the nEq values matched against the nEq left-most - ** columns of the index, and $L is the value in pLower. - ** - ** Or, if pLower is NULL or $L cannot be extracted from it (because it - ** is not a simple variable or literal value), the lower bound of the - ** range is $P. Due to a quirk in the way whereKeyStats() works, even - ** if $L is available, whereKeyStats() is called for both ($P) and - ** ($P:$L) and the larger of the two returned values is used. - ** - ** Similarly, iUpper is to be set to the estimate of the number of rows - ** less than the upper bound of the range query. Where the upper bound - ** is either ($P) or ($P:$U). Again, even if $U is available, both values - ** of iUpper are requested of whereKeyStats() and the smaller used. - ** - ** The number of rows between the two bounds is then just iUpper-iLower. - */ - tRowcnt iLower; /* Rows less than the lower bound */ - tRowcnt iUpper; /* Rows less than the upper bound */ - int iLwrIdx = -2; /* aSample[] for the lower bound */ - int iUprIdx = -1; /* aSample[] for the upper bound */ - - if( pRec ){ - testcase( pRec->nField!=pBuilder->nRecValid ); - pRec->nField = pBuilder->nRecValid; - } - if( nEq==p->nKeyCol ){ - aff = SQLITE_AFF_INTEGER; - }else{ - aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; - } - /* Determine iLower and iUpper using ($P) only. */ - if( nEq==0 ){ - iLower = 0; - iUpper = p->nRowEst0; - }else{ - /* Note: this call could be optimized away - since the same values must - ** have been requested when testing key $P in whereEqualScanEst(). */ - whereKeyStats(pParse, p, pRec, 0, a); - iLower = a[0]; - iUpper = a[0] + a[1]; - } +SQLITE_PRIVATE void sqlite3WhereSplit(WhereClause *pWC, Expr *pExpr, u8 op){ + Expr *pE2 = sqlite3ExprSkipCollate(pExpr); + pWC->op = op; + if( pE2==0 ) return; + if( pE2->op!=op ){ + whereClauseInsert(pWC, pExpr, 0); + }else{ + sqlite3WhereSplit(pWC, pE2->pLeft, op); + sqlite3WhereSplit(pWC, pE2->pRight, op); + } +} - assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 ); - assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); - assert( p->aSortOrder!=0 ); - if( p->aSortOrder[nEq] ){ - /* The roles of pLower and pUpper are swapped for a DESC index */ - SWAP(WhereTerm*, pLower, pUpper); - } +/* +** Initialize a preallocated WhereClause structure. +*/ +SQLITE_PRIVATE void sqlite3WhereClauseInit( + WhereClause *pWC, /* The WhereClause to be initialized */ + WhereInfo *pWInfo /* The WHERE processing context */ +){ + pWC->pWInfo = pWInfo; + pWC->pOuter = 0; + pWC->nTerm = 0; + pWC->nSlot = ArraySize(pWC->aStatic); + pWC->a = pWC->aStatic; +} - /* If possible, improve on the iLower estimate using ($P:$L). */ - if( pLower ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pLower->pExpr->pRight; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; - iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); - iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); - if( iNew>iLower ) iLower = iNew; - nOut--; - pLower = 0; - } - } +/* +** Deallocate a WhereClause structure. The WhereClause structure +** itself is not freed. This routine is the inverse of sqlite3WhereClauseInit(). +*/ +SQLITE_PRIVATE void sqlite3WhereClauseClear(WhereClause *pWC){ + int i; + WhereTerm *a; + sqlite3 *db = pWC->pWInfo->pParse->db; + for(i=pWC->nTerm-1, a=pWC->a; i>=0; i--, a++){ + if( a->wtFlags & TERM_DYNAMIC ){ + sqlite3ExprDelete(db, a->pExpr); + } + if( a->wtFlags & TERM_ORINFO ){ + whereOrInfoDelete(db, a->u.pOrInfo); + }else if( a->wtFlags & TERM_ANDINFO ){ + whereAndInfoDelete(db, a->u.pAndInfo); + } + } + if( pWC->a!=pWC->aStatic ){ + sqlite3DbFree(db, pWC->a); + } +} - /* If possible, improve on the iUpper estimate using ($P:$U). */ - if( pUpper ){ - int bOk; /* True if value is extracted from pExpr */ - Expr *pExpr = pUpper->pExpr->pRight; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); - if( rc==SQLITE_OK && bOk ){ - tRowcnt iNew; - iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); - iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); - if( iNew<iUpper ) iUpper = iNew; - nOut--; - pUpper = 0; - } - } - pBuilder->pRec = pRec; - if( rc==SQLITE_OK ){ - if( iUpper>iLower ){ - nNew = sqlite3LogEst(iUpper - iLower); - /* TUNING: If both iUpper and iLower are derived from the same - ** sample, then assume they are 4x more selective. This brings - ** the estimated selectivity more in line with what it would be - ** if estimated without the use of STAT3/4 tables. */ - if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); - }else{ - nNew = 10; assert( 10==sqlite3LogEst(2) ); - } - if( nNew<nOut ){ - nOut = nNew; - } - WHERETRACE(0x10, ("STAT4 range scan: %u..%u est=%d\n", - (u32)iLower, (u32)iUpper, nOut)); - } - }else{ - int bDone = 0; - rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); - if( bDone ) return rc; +/* +** These routines walk (recursively) an expression tree and generate +** a bitmask indicating which tables are used in that expression +** tree. +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereExprUsage(WhereMaskSet *pMaskSet, Expr *p){ + Bitmask mask = 0; + if( p==0 ) return 0; + if( p->op==TK_COLUMN ){ + mask = sqlite3WhereGetMask(pMaskSet, p->iTable); + return mask; + } + mask = sqlite3WhereExprUsage(pMaskSet, p->pRight); + mask |= sqlite3WhereExprUsage(pMaskSet, p->pLeft); + if( ExprHasProperty(p, EP_xIsSelect) ){ + mask |= exprSelectUsage(pMaskSet, p->x.pSelect); + }else{ + mask |= sqlite3WhereExprListUsage(pMaskSet, p->x.pList); + } + return mask; +} +SQLITE_PRIVATE Bitmask sqlite3WhereExprListUsage(WhereMaskSet *pMaskSet, ExprList *pList){ + int i; + Bitmask mask = 0; + if( pList ){ + for(i=0; i<pList->nExpr; i++){ + mask |= sqlite3WhereExprUsage(pMaskSet, pList->a[i].pExpr); } } -#else - UNUSED_PARAMETER(pParse); - UNUSED_PARAMETER(pBuilder); - assert( pLower || pUpper ); -#endif - assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 ); - nNew = whereRangeAdjust(pLower, nOut); - nNew = whereRangeAdjust(pUpper, nNew); + return mask; +} - /* TUNING: If there is both an upper and lower limit and neither limit - ** has an application-defined likelihood(), assume the range is - ** reduced by an additional 75%. This means that, by default, an open-ended - ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the - ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to - ** match 1/64 of the index. */ - if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ - nNew -= 20; - } - nOut -= (pLower!=0) + (pUpper!=0); - if( nNew<10 ) nNew = 10; - if( nNew<nOut ) nOut = nNew; -#if defined(WHERETRACE_ENABLED) - if( pLoop->nOut>nOut ){ - WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n", - pLoop->nOut, nOut)); +/* +** Call exprAnalyze on all terms in a WHERE clause. +** +** Note that exprAnalyze() might add new virtual terms onto the +** end of the WHERE clause. We do not want to analyze these new +** virtual terms, so start analyzing at the end and work forward +** so that the added virtual terms are never processed. +*/ +SQLITE_PRIVATE void sqlite3WhereExprAnalyze( + SrcList *pTabList, /* the FROM clause */ + WhereClause *pWC /* the WHERE clause to be analyzed */ +){ + int i; + for(i=pWC->nTerm-1; i>=0; i--){ + exprAnalyze(pTabList, pWC, i); } -#endif - pLoop->nOut = (LogEst)nOut; - return rc; } -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/************** End of whereexpr.c *******************************************/ +/************** Begin file where.c *******************************************/ /* -** Estimate the number of rows that will be returned based on -** an equality constraint x=VALUE and where that VALUE occurs in -** the histogram data. This only works when x is the left-most -** column of an index and sqlite_stat3 histogram data is available -** for that index. When pExpr==NULL that means the constraint is -** "x IS NULL" instead of "x=VALUE". +** 2001 September 15 ** -** Write the estimated row count into *pnRow and return SQLITE_OK. -** If unable to make an estimate, leave *pnRow unchanged and return -** non-zero. +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** This routine can fail if it is unable to load a collating sequence -** required for string comparison, or if unable to allocate memory -** for a UTF conversion required for comparison. The error is stored -** in the pParse structure. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This module contains C code that generates VDBE code used to process +** the WHERE clause of SQL statements. This module is responsible for +** generating the code that loops through a table looking for applicable +** rows. Indices are selected and used to speed the search when doing +** so is applicable. Because this module is responsible for selecting +** indices, you might also think of this module as the "query optimizer". */ -static int whereEqualScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereLoopBuilder *pBuilder, - Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ - tRowcnt *pnRow /* Write the revised row estimate here */ -){ - Index *p = pBuilder->pNew->u.btree.pIndex; - int nEq = pBuilder->pNew->u.btree.nEq; - UnpackedRecord *pRec = pBuilder->pRec; - u8 aff; /* Column affinity */ - int rc; /* Subfunction return code */ - tRowcnt a[2]; /* Statistics */ - int bOk; +/* #include "sqliteInt.h" */ +/* #include "whereInt.h" */ - assert( nEq>=1 ); - assert( nEq<=p->nColumn ); - assert( p->aSample!=0 ); - assert( p->nSample>0 ); - assert( pBuilder->nRecValid<nEq ); +/* Forward declaration of methods */ +static int whereLoopResize(sqlite3*, WhereLoop*, int); - /* If values are not available for all fields of the index to the left - ** of this one, no estimate can be made. Return SQLITE_NOTFOUND. */ - if( pBuilder->nRecValid<(nEq-1) ){ - return SQLITE_NOTFOUND; - } +/* Test variable that can be set to enable WHERE tracing */ +#if defined(SQLITE_TEST) || defined(SQLITE_DEBUG) +/***/ int sqlite3WhereTrace = 0; +#endif - /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue() - ** below would return the same value. */ - if( nEq>=p->nColumn ){ - *pnRow = 1; - return SQLITE_OK; - } - aff = p->pTable->aCol[p->aiColumn[nEq-1]].affinity; - rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq-1, &bOk); - pBuilder->pRec = pRec; - if( rc!=SQLITE_OK ) return rc; - if( bOk==0 ) return SQLITE_NOTFOUND; - pBuilder->nRecValid = nEq; +/* +** Return the estimated number of output rows from a WHERE clause +*/ +SQLITE_PRIVATE u64 sqlite3WhereOutputRowCount(WhereInfo *pWInfo){ + return sqlite3LogEstToInt(pWInfo->nRowOut); +} - whereKeyStats(pParse, p, pRec, 0, a); - WHERETRACE(0x10,("equality scan regions: %d\n", (int)a[1])); - *pnRow = a[1]; - - return rc; +/* +** Return one of the WHERE_DISTINCT_xxxxx values to indicate how this +** WHERE clause returns outputs for DISTINCT processing. +*/ +SQLITE_PRIVATE int sqlite3WhereIsDistinct(WhereInfo *pWInfo){ + return pWInfo->eDistinct; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ -#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 /* -** Estimate the number of rows that will be returned based on -** an IN constraint where the right-hand side of the IN operator -** is a list of values. Example: -** -** WHERE x IN (1,2,3,4) -** -** Write the estimated row count into *pnRow and return SQLITE_OK. -** If unable to make an estimate, leave *pnRow unchanged and return -** non-zero. -** -** This routine can fail if it is unable to load a collating sequence -** required for string comparison, or if unable to allocate memory -** for a UTF conversion required for comparison. The error is stored -** in the pParse structure. +** Return TRUE if the WHERE clause returns rows in ORDER BY order. +** Return FALSE if the output needs to be sorted. */ -static int whereInScanEst( - Parse *pParse, /* Parsing & code generating context */ - WhereLoopBuilder *pBuilder, - ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ - tRowcnt *pnRow /* Write the revised row estimate here */ -){ - Index *p = pBuilder->pNew->u.btree.pIndex; - i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); - int nRecValid = pBuilder->nRecValid; - int rc = SQLITE_OK; /* Subfunction return code */ - tRowcnt nEst; /* Number of rows for a single term */ - tRowcnt nRowEst = 0; /* New estimate of the number of rows */ - int i; /* Loop counter */ +SQLITE_PRIVATE int sqlite3WhereIsOrdered(WhereInfo *pWInfo){ + return pWInfo->nOBSat; +} - assert( p->aSample!=0 ); - for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){ - nEst = nRow0; - rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); - nRowEst += nEst; - pBuilder->nRecValid = nRecValid; - } +/* +** Return the VDBE address or label to jump to in order to continue +** immediately with the next row of a WHERE clause. +*/ +SQLITE_PRIVATE int sqlite3WhereContinueLabel(WhereInfo *pWInfo){ + assert( pWInfo->iContinue!=0 ); + return pWInfo->iContinue; +} - if( rc==SQLITE_OK ){ - if( nRowEst > nRow0 ) nRowEst = nRow0; - *pnRow = nRowEst; - WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst)); - } - assert( pBuilder->nRecValid==nRecValid ); - return rc; +/* +** Return the VDBE address or label to jump to in order to break +** out of a WHERE loop. +*/ +SQLITE_PRIVATE int sqlite3WhereBreakLabel(WhereInfo *pWInfo){ + return pWInfo->iBreak; } -#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ /* -** Disable a term in the WHERE clause. Except, do not disable the term -** if it controls a LEFT OUTER JOIN and it did not originate in the ON -** or USING clause of that join. -** -** Consider the term t2.z='ok' in the following queries: -** -** (1) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x WHERE t2.z='ok' -** (2) SELECT * FROM t1 LEFT JOIN t2 ON t1.a=t2.x AND t2.z='ok' -** (3) SELECT * FROM t1, t2 WHERE t1.a=t2.x AND t2.z='ok' -** -** The t2.z='ok' is disabled in the in (2) because it originates -** in the ON clause. The term is disabled in (3) because it is not part -** of a LEFT OUTER JOIN. In (1), the term is not disabled. -** -** Disabling a term causes that term to not be tested in the inner loop -** of the join. Disabling is an optimization. When terms are satisfied -** by indices, we disable them to prevent redundant tests in the inner -** loop. We would get the correct results if nothing were ever disabled, -** but joins might run a little slower. The trick is to disable as much -** as we can without disabling too much. If we disabled in (1), we'd get -** the wrong answer. See ticket #813. -** -** If all the children of a term are disabled, then that term is also -** automatically disabled. In this way, terms get disabled if derived -** virtual terms are tested first. For example: -** -** x GLOB 'abc*' AND x>='abc' AND x<'acd' -** \___________/ \______/ \_____/ -** parent child1 child2 +** Return TRUE if an UPDATE or DELETE statement can operate directly on +** the rowids returned by a WHERE clause. Return FALSE if doing an +** UPDATE or DELETE might change subsequent WHERE clause results. ** -** Only the parent term was in the original WHERE clause. The child1 -** and child2 terms were added by the LIKE optimization. If both of -** the virtual child terms are valid, then testing of the parent can be -** skipped. +** If the ONEPASS optimization is used (if this routine returns true) +** then also write the indices of open cursors used by ONEPASS +** into aiCur[0] and aiCur[1]. iaCur[0] gets the cursor of the data +** table and iaCur[1] gets the cursor used by an auxiliary index. +** Either value may be -1, indicating that cursor is not used. +** Any cursors returned will have been opened for writing. ** -** Usually the parent term is marked as TERM_CODED. But if the parent -** term was originally TERM_LIKE, then the parent gets TERM_LIKECOND instead. -** The TERM_LIKECOND marking indicates that the term should be coded inside -** a conditional such that is only evaluated on the second pass of a -** LIKE-optimization loop, when scanning BLOBs instead of strings. +** aiCur[0] and aiCur[1] both get -1 if the where-clause logic is +** unable to use the ONEPASS optimization. */ -static void disableTerm(WhereLevel *pLevel, WhereTerm *pTerm){ - int nLoop = 0; - while( pTerm - && (pTerm->wtFlags & TERM_CODED)==0 - && (pLevel->iLeftJoin==0 || ExprHasProperty(pTerm->pExpr, EP_FromJoin)) - && (pLevel->notReady & pTerm->prereqAll)==0 - ){ - if( nLoop && (pTerm->wtFlags & TERM_LIKE)!=0 ){ - pTerm->wtFlags |= TERM_LIKECOND; - }else{ - pTerm->wtFlags |= TERM_CODED; - } - if( pTerm->iParent<0 ) break; - pTerm = &pTerm->pWC->a[pTerm->iParent]; - pTerm->nChild--; - if( pTerm->nChild!=0 ) break; - nLoop++; - } +SQLITE_PRIVATE int sqlite3WhereOkOnePass(WhereInfo *pWInfo, int *aiCur){ + memcpy(aiCur, pWInfo->aiCurOnePass, sizeof(int)*2); + return pWInfo->okOnePass; } /* -** Code an OP_Affinity opcode to apply the column affinity string zAff -** to the n registers starting at base. -** -** As an optimization, SQLITE_AFF_NONE entries (which are no-ops) at the -** beginning and end of zAff are ignored. If all entries in zAff are -** SQLITE_AFF_NONE, then no code gets generated. -** -** This routine makes its own copy of zAff so that the caller is free -** to modify zAff after this routine returns. +** Move the content of pSrc into pDest */ -static void codeApplyAffinity(Parse *pParse, int base, int n, char *zAff){ - Vdbe *v = pParse->pVdbe; - if( zAff==0 ){ - assert( pParse->db->mallocFailed ); - return; - } - assert( v!=0 ); +static void whereOrMove(WhereOrSet *pDest, WhereOrSet *pSrc){ + pDest->n = pSrc->n; + memcpy(pDest->a, pSrc->a, pDest->n*sizeof(pDest->a[0])); +} - /* Adjust base and n to skip over SQLITE_AFF_NONE entries at the beginning - ** and end of the affinity string. - */ - while( n>0 && zAff[0]==SQLITE_AFF_NONE ){ - n--; - base++; - zAff++; +/* +** Try to insert a new prerequisite/cost entry into the WhereOrSet pSet. +** +** The new entry might overwrite an existing entry, or it might be +** appended, or it might be discarded. Do whatever is the right thing +** so that pSet keeps the N_OR_COST best entries seen so far. +*/ +static int whereOrInsert( + WhereOrSet *pSet, /* The WhereOrSet to be updated */ + Bitmask prereq, /* Prerequisites of the new entry */ + LogEst rRun, /* Run-cost of the new entry */ + LogEst nOut /* Number of outputs for the new entry */ +){ + u16 i; + WhereOrCost *p; + for(i=pSet->n, p=pSet->a; i>0; i--, p++){ + if( rRun<=p->rRun && (prereq & p->prereq)==prereq ){ + goto whereOrInsert_done; + } + if( p->rRun<=rRun && (p->prereq & prereq)==p->prereq ){ + return 0; + } } - while( n>1 && zAff[n-1]==SQLITE_AFF_NONE ){ - n--; + if( pSet->n<N_OR_COST ){ + p = &pSet->a[pSet->n++]; + p->nOut = nOut; + }else{ + p = pSet->a; + for(i=1; i<pSet->n; i++){ + if( p->rRun>pSet->a[i].rRun ) p = pSet->a + i; + } + if( p->rRun<=rRun ) return 0; } +whereOrInsert_done: + p->prereq = prereq; + p->rRun = rRun; + if( p->nOut>nOut ) p->nOut = nOut; + return 1; +} - /* Code the OP_Affinity opcode if there is anything left to do. */ - if( n>0 ){ - sqlite3VdbeAddOp2(v, OP_Affinity, base, n); - sqlite3VdbeChangeP4(v, -1, zAff, n); - sqlite3ExprCacheAffinityChange(pParse, base, n); +/* +** Return the bitmask for the given cursor number. Return 0 if +** iCursor is not in the set. +*/ +SQLITE_PRIVATE Bitmask sqlite3WhereGetMask(WhereMaskSet *pMaskSet, int iCursor){ + int i; + assert( pMaskSet->n<=(int)sizeof(Bitmask)*8 ); + for(i=0; i<pMaskSet->n; i++){ + if( pMaskSet->ix[i]==iCursor ){ + return MASKBIT(i); + } } + return 0; } - /* -** Generate code for a single equality term of the WHERE clause. An equality -** term can be either X=expr or X IN (...). pTerm is the term to be -** coded. -** -** The current value for the constraint is left in register iReg. +** Create a new mask for cursor iCursor. ** -** For a constraint of the form X=expr, the expression is evaluated and its -** result is left on the stack. For constraints of the form X IN (...) -** this routine sets up a loop that will iterate over all values of X. +** There is one cursor per table in the FROM clause. The number of +** tables in the FROM clause is limited by a test early in the +** sqlite3WhereBegin() routine. So we know that the pMaskSet->ix[] +** array will never overflow. */ -static int codeEqualityTerm( - Parse *pParse, /* The parsing context */ - WhereTerm *pTerm, /* The term of the WHERE clause to be coded */ - WhereLevel *pLevel, /* The level of the FROM clause we are working on */ - int iEq, /* Index of the equality term within this level */ - int bRev, /* True for reverse-order IN operations */ - int iTarget /* Attempt to leave results in this register */ -){ - Expr *pX = pTerm->pExpr; - Vdbe *v = pParse->pVdbe; - int iReg; /* Register holding results */ +static void createMask(WhereMaskSet *pMaskSet, int iCursor){ + assert( pMaskSet->n < ArraySize(pMaskSet->ix) ); + pMaskSet->ix[pMaskSet->n++] = iCursor; +} - assert( iTarget>0 ); - if( pX->op==TK_EQ ){ - iReg = sqlite3ExprCodeTarget(pParse, pX->pRight, iTarget); - }else if( pX->op==TK_ISNULL ){ - iReg = iTarget; - sqlite3VdbeAddOp2(v, OP_Null, 0, iReg); -#ifndef SQLITE_OMIT_SUBQUERY - }else{ - int eType; - int iTab; - struct InLoop *pIn; - WhereLoop *pLoop = pLevel->pWLoop; +/* +** Advance to the next WhereTerm that matches according to the criteria +** established when the pScan object was initialized by whereScanInit(). +** Return NULL if there are no more matching WhereTerms. +*/ +static WhereTerm *whereScanNext(WhereScan *pScan){ + int iCur; /* The cursor on the LHS of the term */ + int iColumn; /* The column on the LHS of the term. -1 for IPK */ + Expr *pX; /* An expression being tested */ + WhereClause *pWC; /* Shorthand for pScan->pWC */ + WhereTerm *pTerm; /* The term being tested */ + int k = pScan->k; /* Where to start scanning */ - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 - && pLoop->u.btree.pIndex!=0 - && pLoop->u.btree.pIndex->aSortOrder[iEq] - ){ - testcase( iEq==0 ); - testcase( bRev ); - bRev = !bRev; - } - assert( pX->op==TK_IN ); - iReg = iTarget; - eType = sqlite3FindInIndex(pParse, pX, IN_INDEX_LOOP, 0); - if( eType==IN_INDEX_INDEX_DESC ){ - testcase( bRev ); - bRev = !bRev; - } - iTab = pX->iTable; - sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iTab, 0); - VdbeCoverageIf(v, bRev); - VdbeCoverageIf(v, !bRev); - assert( (pLoop->wsFlags & WHERE_MULTI_OR)==0 ); - pLoop->wsFlags |= WHERE_IN_ABLE; - if( pLevel->u.in.nIn==0 ){ - pLevel->addrNxt = sqlite3VdbeMakeLabel(v); - } - pLevel->u.in.nIn++; - pLevel->u.in.aInLoop = - sqlite3DbReallocOrFree(pParse->db, pLevel->u.in.aInLoop, - sizeof(pLevel->u.in.aInLoop[0])*pLevel->u.in.nIn); - pIn = pLevel->u.in.aInLoop; - if( pIn ){ - pIn += pLevel->u.in.nIn - 1; - pIn->iCur = iTab; - if( eType==IN_INDEX_ROWID ){ - pIn->addrInTop = sqlite3VdbeAddOp2(v, OP_Rowid, iTab, iReg); - }else{ - pIn->addrInTop = sqlite3VdbeAddOp3(v, OP_Column, iTab, 0, iReg); + while( pScan->iEquiv<=pScan->nEquiv ){ + iCur = pScan->aEquiv[pScan->iEquiv-2]; + iColumn = pScan->aEquiv[pScan->iEquiv-1]; + while( (pWC = pScan->pWC)!=0 ){ + for(pTerm=pWC->a+k; k<pWC->nTerm; k++, pTerm++){ + if( pTerm->leftCursor==iCur + && pTerm->u.leftColumn==iColumn + && (pScan->iEquiv<=2 || !ExprHasProperty(pTerm->pExpr, EP_FromJoin)) + ){ + if( (pTerm->eOperator & WO_EQUIV)!=0 + && pScan->nEquiv<ArraySize(pScan->aEquiv) + ){ + int j; + pX = sqlite3ExprSkipCollate(pTerm->pExpr->pRight); + assert( pX->op==TK_COLUMN ); + for(j=0; j<pScan->nEquiv; j+=2){ + if( pScan->aEquiv[j]==pX->iTable + && pScan->aEquiv[j+1]==pX->iColumn ){ + break; + } + } + if( j==pScan->nEquiv ){ + pScan->aEquiv[j] = pX->iTable; + pScan->aEquiv[j+1] = pX->iColumn; + pScan->nEquiv += 2; + } + } + if( (pTerm->eOperator & pScan->opMask)!=0 ){ + /* Verify the affinity and collating sequence match */ + if( pScan->zCollName && (pTerm->eOperator & WO_ISNULL)==0 ){ + CollSeq *pColl; + Parse *pParse = pWC->pWInfo->pParse; + pX = pTerm->pExpr; + if( !sqlite3IndexAffinityOk(pX, pScan->idxaff) ){ + continue; + } + assert(pX->pLeft); + pColl = sqlite3BinaryCompareCollSeq(pParse, + pX->pLeft, pX->pRight); + if( pColl==0 ) pColl = pParse->db->pDfltColl; + if( sqlite3StrICmp(pColl->zName, pScan->zCollName) ){ + continue; + } + } + if( (pTerm->eOperator & (WO_EQ|WO_IS))!=0 + && (pX = pTerm->pExpr->pRight)->op==TK_COLUMN + && pX->iTable==pScan->aEquiv[0] + && pX->iColumn==pScan->aEquiv[1] + ){ + testcase( pTerm->eOperator & WO_IS ); + continue; + } + pScan->k = k+1; + return pTerm; + } + } } - pIn->eEndLoopOp = bRev ? OP_PrevIfOpen : OP_NextIfOpen; - sqlite3VdbeAddOp1(v, OP_IsNull, iReg); VdbeCoverage(v); - }else{ - pLevel->u.in.nIn = 0; + pScan->pWC = pScan->pWC->pOuter; + k = 0; } -#endif + pScan->pWC = pScan->pOrigWC; + k = 0; + pScan->iEquiv += 2; } - disableTerm(pLevel, pTerm); - return iReg; + return 0; } /* -** Generate code that will evaluate all == and IN constraints for an -** index scan. -** -** For example, consider table t1(a,b,c,d,e,f) with index i1(a,b,c). -** Suppose the WHERE clause is this: a==5 AND b IN (1,2,3) AND c>5 AND c<10 -** The index has as many as three equality constraints, but in this -** example, the third "c" value is an inequality. So only two -** constraints are coded. This routine will generate code to evaluate -** a==5 and b IN (1,2,3). The current values for a and b will be stored -** in consecutive registers and the index of the first register is returned. -** -** In the example above nEq==2. But this subroutine works for any value -** of nEq including 0. If nEq==0, this routine is nearly a no-op. -** The only thing it does is allocate the pLevel->iMem memory cell and -** compute the affinity string. -** -** The nExtraReg parameter is 0 or 1. It is 0 if all WHERE clause constraints -** are == or IN and are covered by the nEq. nExtraReg is 1 if there is -** an inequality constraint (such as the "c>=5 AND c<10" in the example) that -** occurs after the nEq quality constraints. -** -** This routine allocates a range of nEq+nExtraReg memory cells and returns -** the index of the first memory cell in that range. The code that -** calls this routine will use that memory range to store keys for -** start and termination conditions of the loop. -** key value of the loop. If one or more IN operators appear, then -** this routine allocates an additional nEq memory cells for internal -** use. +** Initialize a WHERE clause scanner object. Return a pointer to the +** first match. Return NULL if there are no matches. ** -** Before returning, *pzAff is set to point to a buffer containing a -** copy of the column affinity string of the index allocated using -** sqlite3DbMalloc(). Except, entries in the copy of the string associated -** with equality constraints that use NONE affinity are set to -** SQLITE_AFF_NONE. This is to deal with SQL such as the following: +** The scanner will be searching the WHERE clause pWC. It will look +** for terms of the form "X <op> <expr>" where X is column iColumn of table +** iCur. The <op> must be one of the operators described by opMask. ** -** CREATE TABLE t1(a TEXT PRIMARY KEY, b); -** SELECT ... FROM t1 AS t2, t1 WHERE t1.a = t2.b; +** If the search is for X and the WHERE clause contains terms of the +** form X=Y then this routine might also return terms of the form +** "Y <op> <expr>". The number of levels of transitivity is limited, +** but is enough to handle most commonly occurring SQL statements. ** -** In the example above, the index on t1(a) has TEXT affinity. But since -** the right hand side of the equality constraint (t2.b) has NONE affinity, -** no conversion should be attempted before using a t2.b value as part of -** a key to search the index. Hence the first byte in the returned affinity -** string in this example would be set to SQLITE_AFF_NONE. +** If X is not the INTEGER PRIMARY KEY then X must be compatible with +** index pIdx. */ -static int codeAllEqualityTerms( - Parse *pParse, /* Parsing context */ - WhereLevel *pLevel, /* Which nested loop of the FROM we are coding */ - int bRev, /* Reverse the order of IN operators */ - int nExtraReg, /* Number of extra registers to allocate */ - char **pzAff /* OUT: Set to point to affinity string */ +static WhereTerm *whereScanInit( + WhereScan *pScan, /* The WhereScan object being initialized */ + WhereClause *pWC, /* The WHERE clause to be scanned */ + int iCur, /* Cursor to scan for */ + int iColumn, /* Column to scan for */ + u32 opMask, /* Operator(s) to scan for */ + Index *pIdx /* Must be compatible with this index */ ){ - u16 nEq; /* The number of == or IN constraints to code */ - u16 nSkip; /* Number of left-most columns to skip */ - Vdbe *v = pParse->pVdbe; /* The vm under construction */ - Index *pIdx; /* The index being used for this loop */ - WhereTerm *pTerm; /* A single constraint term */ - WhereLoop *pLoop; /* The WhereLoop object */ - int j; /* Loop counter */ - int regBase; /* Base register */ - int nReg; /* Number of registers to allocate */ - char *zAff; /* Affinity string to return */ - - /* This module is only called on query plans that use an index. */ - pLoop = pLevel->pWLoop; - assert( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 ); - nEq = pLoop->u.btree.nEq; - nSkip = pLoop->nSkip; - pIdx = pLoop->u.btree.pIndex; - assert( pIdx!=0 ); - - /* Figure out how many memory cells we will need then allocate them. - */ - regBase = pParse->nMem + 1; - nReg = pLoop->u.btree.nEq + nExtraReg; - pParse->nMem += nReg; - - zAff = sqlite3DbStrDup(pParse->db, sqlite3IndexAffinityStr(v, pIdx)); - if( !zAff ){ - pParse->db->mallocFailed = 1; - } - - if( nSkip ){ - int iIdxCur = pLevel->iIdxCur; - sqlite3VdbeAddOp1(v, (bRev?OP_Last:OP_Rewind), iIdxCur); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - VdbeComment((v, "begin skip-scan on %s", pIdx->zName)); - j = sqlite3VdbeAddOp0(v, OP_Goto); - pLevel->addrSkip = sqlite3VdbeAddOp4Int(v, (bRev?OP_SeekLT:OP_SeekGT), - iIdxCur, 0, regBase, nSkip); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - sqlite3VdbeJumpHere(v, j); - for(j=0; j<nSkip; j++){ - sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, j, regBase+j); - assert( pIdx->aiColumn[j]>=0 ); - VdbeComment((v, "%s", pIdx->pTable->aCol[pIdx->aiColumn[j]].zName)); - } - } + int j; - /* Evaluate the equality constraints - */ - assert( zAff==0 || (int)strlen(zAff)>=nEq ); - for(j=nSkip; j<nEq; j++){ - int r1; - pTerm = pLoop->aLTerm[j]; - assert( pTerm!=0 ); - /* The following testcase is true for indices with redundant columns. - ** Ex: CREATE INDEX i1 ON t1(a,b,a); SELECT * FROM t1 WHERE a=0 AND b=0; */ - testcase( (pTerm->wtFlags & TERM_CODED)!=0 ); - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - r1 = codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, regBase+j); - if( r1!=regBase+j ){ - if( nReg==1 ){ - sqlite3ReleaseTempReg(pParse, regBase); - regBase = r1; - }else{ - sqlite3VdbeAddOp2(v, OP_SCopy, r1, regBase+j); - } - } - testcase( pTerm->eOperator & WO_ISNULL ); - testcase( pTerm->eOperator & WO_IN ); - if( (pTerm->eOperator & (WO_ISNULL|WO_IN))==0 ){ - Expr *pRight = pTerm->pExpr->pRight; - if( sqlite3ExprCanBeNull(pRight) ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+j, pLevel->addrBrk); - VdbeCoverage(v); - } - if( zAff ){ - if( sqlite3CompareAffinity(pRight, zAff[j])==SQLITE_AFF_NONE ){ - zAff[j] = SQLITE_AFF_NONE; - } - if( sqlite3ExprNeedsNoAffinityChange(pRight, zAff[j]) ){ - zAff[j] = SQLITE_AFF_NONE; - } - } + /* memset(pScan, 0, sizeof(*pScan)); */ + pScan->pOrigWC = pWC; + pScan->pWC = pWC; + if( pIdx && iColumn>=0 ){ + pScan->idxaff = pIdx->pTable->aCol[iColumn].affinity; + for(j=0; pIdx->aiColumn[j]!=iColumn; j++){ + if( NEVER(j>pIdx->nColumn) ) return 0; } + pScan->zCollName = pIdx->azColl[j]; + }else{ + pScan->idxaff = 0; + pScan->zCollName = 0; } - *pzAff = zAff; - return regBase; + pScan->opMask = opMask; + pScan->k = 0; + pScan->aEquiv[0] = iCur; + pScan->aEquiv[1] = iColumn; + pScan->nEquiv = 2; + pScan->iEquiv = 2; + return whereScanNext(pScan); } -#ifndef SQLITE_OMIT_EXPLAIN /* -** This routine is a helper for explainIndexRange() below +** Search for a term in the WHERE clause that is of the form "X <op> <expr>" +** where X is a reference to the iColumn of table iCur and <op> is one of +** the WO_xx operator codes specified by the op parameter. +** Return a pointer to the term. Return 0 if not found. ** -** pStr holds the text of an expression that we are building up one term -** at a time. This routine adds a new term to the end of the expression. -** Terms are separated by AND so add the "AND" text for second and subsequent -** terms only. +** The term returned might by Y=<expr> if there is another constraint in +** the WHERE clause that specifies that X=Y. Any such constraints will be +** identified by the WO_EQUIV bit in the pTerm->eOperator field. The +** aEquiv[] array holds X and all its equivalents, with each SQL variable +** taking up two slots in aEquiv[]. The first slot is for the cursor number +** and the second is for the column number. There are 22 slots in aEquiv[] +** so that means we can look for X plus up to 10 other equivalent values. +** Hence a search for X will return <expr> if X=A1 and A1=A2 and A2=A3 +** and ... and A9=A10 and A10=<expr>. +** +** If there are multiple terms in the WHERE clause of the form "X <op> <expr>" +** then try for the one with no dependencies on <expr> - in other words where +** <expr> is a constant expression of some kind. Only return entries of +** the form "X <op> Y" where Y is a column in another table if no terms of +** the form "X <op> <const-expr>" exist. If no terms with a constant RHS +** exist, try to return a term that does not use WO_EQUIV. */ -static void explainAppendTerm( - StrAccum *pStr, /* The text expression being built */ - int iTerm, /* Index of this term. First is zero */ - const char *zColumn, /* Name of the column */ - const char *zOp /* Name of the operator */ +SQLITE_PRIVATE WhereTerm *sqlite3WhereFindTerm( + WhereClause *pWC, /* The WHERE clause to be searched */ + int iCur, /* Cursor number of LHS */ + int iColumn, /* Column number of LHS */ + Bitmask notReady, /* RHS must not overlap with this mask */ + u32 op, /* Mask of WO_xx values describing operator */ + Index *pIdx /* Must be compatible with this index, if not NULL */ ){ - if( iTerm ) sqlite3StrAccumAppend(pStr, " AND ", 5); - sqlite3StrAccumAppendAll(pStr, zColumn); - sqlite3StrAccumAppend(pStr, zOp, 1); - sqlite3StrAccumAppend(pStr, "?", 1); + WhereTerm *pResult = 0; + WhereTerm *p; + WhereScan scan; + + p = whereScanInit(&scan, pWC, iCur, iColumn, op, pIdx); + op &= WO_EQ|WO_IS; + while( p ){ + if( (p->prereqRight & notReady)==0 ){ + if( p->prereqRight==0 && (p->eOperator&op)!=0 ){ + testcase( p->eOperator & WO_IS ); + return p; + } + if( pResult==0 ) pResult = p; + } + p = whereScanNext(&scan); + } + return pResult; } /* -** Argument pLevel describes a strategy for scanning table pTab. This -** function appends text to pStr that describes the subset of table -** rows scanned by the strategy in the form of an SQL expression. -** -** For example, if the query: -** -** SELECT * FROM t1 WHERE a=1 AND b>2; -** -** is run and there is an index on (a, b), then this function returns a -** string similar to: +** This function searches pList for an entry that matches the iCol-th column +** of index pIdx. ** -** "a=? AND b>?" +** If such an expression is found, its index in pList->a[] is returned. If +** no expression is found, -1 is returned. */ -static void explainIndexRange(StrAccum *pStr, WhereLoop *pLoop, Table *pTab){ - Index *pIndex = pLoop->u.btree.pIndex; - u16 nEq = pLoop->u.btree.nEq; - u16 nSkip = pLoop->nSkip; - int i, j; - Column *aCol = pTab->aCol; - i16 *aiColumn = pIndex->aiColumn; - - if( nEq==0 && (pLoop->wsFlags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))==0 ) return; - sqlite3StrAccumAppend(pStr, " (", 2); - for(i=0; i<nEq; i++){ - char *z = aiColumn[i] < 0 ? "rowid" : aCol[aiColumn[i]].zName; - if( i>=nSkip ){ - explainAppendTerm(pStr, i, z, "="); - }else{ - if( i ) sqlite3StrAccumAppend(pStr, " AND ", 5); - sqlite3XPrintf(pStr, 0, "ANY(%s)", z); +static int findIndexCol( + Parse *pParse, /* Parse context */ + ExprList *pList, /* Expression list to search */ + int iBase, /* Cursor for table associated with pIdx */ + Index *pIdx, /* Index to match column of */ + int iCol /* Column of index to match */ +){ + int i; + const char *zColl = pIdx->azColl[iCol]; + + for(i=0; i<pList->nExpr; i++){ + Expr *p = sqlite3ExprSkipCollate(pList->a[i].pExpr); + if( p->op==TK_COLUMN + && p->iColumn==pIdx->aiColumn[iCol] + && p->iTable==iBase + ){ + CollSeq *pColl = sqlite3ExprCollSeq(pParse, pList->a[i].pExpr); + if( pColl && 0==sqlite3StrICmp(pColl->zName, zColl) ){ + return i; + } } } - j = i; - if( pLoop->wsFlags&WHERE_BTM_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; - explainAppendTerm(pStr, i++, z, ">"); - } - if( pLoop->wsFlags&WHERE_TOP_LIMIT ){ - char *z = aiColumn[j] < 0 ? "rowid" : aCol[aiColumn[j]].zName; - explainAppendTerm(pStr, i, z, "<"); - } - sqlite3StrAccumAppend(pStr, ")", 1); + return -1; } /* -** This function is a no-op unless currently processing an EXPLAIN QUERY PLAN -** command, or if either SQLITE_DEBUG or SQLITE_ENABLE_STMT_SCANSTATUS was -** defined at compile-time. If it is not a no-op, a single OP_Explain opcode -** is added to the output to describe the table scan strategy in pLevel. +** Return true if the DISTINCT expression-list passed as the third argument +** is redundant. ** -** If an OP_Explain opcode is added to the VM, its address is returned. -** Otherwise, if no OP_Explain is coded, zero is returned. +** A DISTINCT list is redundant if any subset of the columns in the +** DISTINCT list are collectively unique and individually non-null. */ -static int explainOneScan( - Parse *pParse, /* Parse context */ - SrcList *pTabList, /* Table list this loop refers to */ - WhereLevel *pLevel, /* Scan to write OP_Explain opcode for */ - int iLevel, /* Value for "level" column of output */ - int iFrom, /* Value for "from" column of output */ - u16 wctrlFlags /* Flags passed to sqlite3WhereBegin() */ +static int isDistinctRedundant( + Parse *pParse, /* Parsing context */ + SrcList *pTabList, /* The FROM clause */ + WhereClause *pWC, /* The WHERE clause */ + ExprList *pDistinct /* The result set that needs to be DISTINCT */ ){ - int ret = 0; -#if !defined(SQLITE_DEBUG) && !defined(SQLITE_ENABLE_STMT_SCANSTATUS) - if( pParse->explain==2 ) -#endif - { - struct SrcList_item *pItem = &pTabList->a[pLevel->iFrom]; - Vdbe *v = pParse->pVdbe; /* VM being constructed */ - sqlite3 *db = pParse->db; /* Database handle */ - int iId = pParse->iSelectId; /* Select id (left-most output column) */ - int isSearch; /* True for a SEARCH. False for SCAN. */ - WhereLoop *pLoop; /* The controlling WhereLoop object */ - u32 flags; /* Flags that describe this loop */ - char *zMsg; /* Text to add to EQP output */ - StrAccum str; /* EQP output string */ - char zBuf[100]; /* Initial space for EQP output string */ - - pLoop = pLevel->pWLoop; - flags = pLoop->wsFlags; - if( (flags&WHERE_MULTI_OR) || (wctrlFlags&WHERE_ONETABLE_ONLY) ) return 0; - - isSearch = (flags&(WHERE_BTM_LIMIT|WHERE_TOP_LIMIT))!=0 - || ((flags&WHERE_VIRTUALTABLE)==0 && (pLoop->u.btree.nEq>0)) - || (wctrlFlags&(WHERE_ORDERBY_MIN|WHERE_ORDERBY_MAX)); + Table *pTab; + Index *pIdx; + int i; + int iBase; - sqlite3StrAccumInit(&str, db, zBuf, sizeof(zBuf), SQLITE_MAX_LENGTH); - sqlite3StrAccumAppendAll(&str, isSearch ? "SEARCH" : "SCAN"); - if( pItem->pSelect ){ - sqlite3XPrintf(&str, 0, " SUBQUERY %d", pItem->iSelectId); - }else{ - sqlite3XPrintf(&str, 0, " TABLE %s", pItem->zName); - } + /* If there is more than one table or sub-select in the FROM clause of + ** this query, then it will not be possible to show that the DISTINCT + ** clause is redundant. */ + if( pTabList->nSrc!=1 ) return 0; + iBase = pTabList->a[0].iCursor; + pTab = pTabList->a[0].pTab; - if( pItem->zAlias ){ - sqlite3XPrintf(&str, 0, " AS %s", pItem->zAlias); - } - if( (flags & (WHERE_IPK|WHERE_VIRTUALTABLE))==0 ){ - const char *zFmt = 0; - Index *pIdx; + /* If any of the expressions is an IPK column on table iBase, then return + ** true. Note: The (p->iTable==iBase) part of this test may be false if the + ** current SELECT is a correlated sub-query. + */ + for(i=0; i<pDistinct->nExpr; i++){ + Expr *p = sqlite3ExprSkipCollate(pDistinct->a[i].pExpr); + if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1; + } - assert( pLoop->u.btree.pIndex!=0 ); - pIdx = pLoop->u.btree.pIndex; - assert( !(flags&WHERE_AUTO_INDEX) || (flags&WHERE_IDX_ONLY) ); - if( !HasRowid(pItem->pTab) && IsPrimaryKeyIndex(pIdx) ){ - if( isSearch ){ - zFmt = "PRIMARY KEY"; + /* Loop through all indices on the table, checking each to see if it makes + ** the DISTINCT qualifier redundant. It does so if: + ** + ** 1. The index is itself UNIQUE, and + ** + ** 2. All of the columns in the index are either part of the pDistinct + ** list, or else the WHERE clause contains a term of the form "col=X", + ** where X is a constant value. The collation sequences of the + ** comparison and select-list expressions must match those of the index. + ** + ** 3. All of those index columns for which the WHERE clause does not + ** contain a "col=X" term are subject to a NOT NULL constraint. + */ + for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + if( !IsUniqueIndex(pIdx) ) continue; + for(i=0; i<pIdx->nKeyCol; i++){ + i16 iCol = pIdx->aiColumn[i]; + if( 0==sqlite3WhereFindTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) ){ + int iIdxCol = findIndexCol(pParse, pDistinct, iBase, pIdx, i); + if( iIdxCol<0 || pTab->aCol[iCol].notNull==0 ){ + break; } - }else if( flags & WHERE_PARTIALIDX ){ - zFmt = "AUTOMATIC PARTIAL COVERING INDEX"; - }else if( flags & WHERE_AUTO_INDEX ){ - zFmt = "AUTOMATIC COVERING INDEX"; - }else if( flags & WHERE_IDX_ONLY ){ - zFmt = "COVERING INDEX %s"; - }else{ - zFmt = "INDEX %s"; - } - if( zFmt ){ - sqlite3StrAccumAppend(&str, " USING ", 7); - sqlite3XPrintf(&str, 0, zFmt, pIdx->zName); - explainIndexRange(&str, pLoop, pItem->pTab); - } - }else if( (flags & WHERE_IPK)!=0 && (flags & WHERE_CONSTRAINT)!=0 ){ - const char *zRange; - if( flags&(WHERE_COLUMN_EQ|WHERE_COLUMN_IN) ){ - zRange = "(rowid=?)"; - }else if( (flags&WHERE_BOTH_LIMIT)==WHERE_BOTH_LIMIT ){ - zRange = "(rowid>? AND rowid<?)"; - }else if( flags&WHERE_BTM_LIMIT ){ - zRange = "(rowid>?)"; - }else{ - assert( flags&WHERE_TOP_LIMIT); - zRange = "(rowid<?)"; } - sqlite3StrAccumAppendAll(&str, " USING INTEGER PRIMARY KEY "); - sqlite3StrAccumAppendAll(&str, zRange); - } -#ifndef SQLITE_OMIT_VIRTUALTABLE - else if( (flags & WHERE_VIRTUALTABLE)!=0 ){ - sqlite3XPrintf(&str, 0, " VIRTUAL TABLE INDEX %d:%s", - pLoop->u.vtab.idxNum, pLoop->u.vtab.idxStr); } -#endif -#ifdef SQLITE_EXPLAIN_ESTIMATED_ROWS - if( pLoop->nOut>=10 ){ - sqlite3XPrintf(&str, 0, " (~%llu rows)", sqlite3LogEstToInt(pLoop->nOut)); - }else{ - sqlite3StrAccumAppend(&str, " (~1 row)", 9); + if( i==pIdx->nKeyCol ){ + /* This index implies that the DISTINCT qualifier is redundant. */ + return 1; } -#endif - zMsg = sqlite3StrAccumFinish(&str); - ret = sqlite3VdbeAddOp4(v, OP_Explain, iId, iLevel, iFrom, zMsg,P4_DYNAMIC); } - return ret; + + return 0; } -#else -# define explainOneScan(u,v,w,x,y,z) 0 -#endif /* SQLITE_OMIT_EXPLAIN */ -#ifdef SQLITE_ENABLE_STMT_SCANSTATUS + /* -** Configure the VM passed as the first argument with an -** sqlite3_stmt_scanstatus() entry corresponding to the scan used to -** implement level pLvl. Argument pSrclist is a pointer to the FROM -** clause that the scan reads data from. +** Estimate the logarithm of the input value to base 2. +*/ +static LogEst estLog(LogEst N){ + return N<=10 ? 0 : sqlite3LogEst(N) - 33; +} + +/* +** Convert OP_Column opcodes to OP_Copy in previously generated code. ** -** If argument addrExplain is not 0, it must be the address of an -** OP_Explain instruction that describes the same loop. +** This routine runs over generated VDBE code and translates OP_Column +** opcodes into OP_Copy, and OP_Rowid into OP_Null, when the table is being +** accessed via co-routine instead of via table lookup. */ -static void addScanStatus( - Vdbe *v, /* Vdbe to add scanstatus entry to */ - SrcList *pSrclist, /* FROM clause pLvl reads data from */ - WhereLevel *pLvl, /* Level to add scanstatus() entry for */ - int addrExplain /* Address of OP_Explain (or 0) */ +static void translateColumnToCopy( + Vdbe *v, /* The VDBE containing code to translate */ + int iStart, /* Translate from this opcode to the end */ + int iTabCur, /* OP_Column/OP_Rowid references to this table */ + int iRegister /* The first column is in this register */ ){ - const char *zObj = 0; - WhereLoop *pLoop = pLvl->pWLoop; - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)==0 && pLoop->u.btree.pIndex!=0 ){ - zObj = pLoop->u.btree.pIndex->zName; - }else{ - zObj = pSrclist->a[pLvl->iFrom].zName; + VdbeOp *pOp = sqlite3VdbeGetOp(v, iStart); + int iEnd = sqlite3VdbeCurrentAddr(v); + for(; iStart<iEnd; iStart++, pOp++){ + if( pOp->p1!=iTabCur ) continue; + if( pOp->opcode==OP_Column ){ + pOp->opcode = OP_Copy; + pOp->p1 = pOp->p2 + iRegister; + pOp->p2 = pOp->p3; + pOp->p3 = 0; + }else if( pOp->opcode==OP_Rowid ){ + pOp->opcode = OP_Null; + pOp->p1 = 0; + pOp->p3 = 0; + } } - sqlite3VdbeScanStatus( - v, addrExplain, pLvl->addrBody, pLvl->addrVisit, pLoop->nOut, zObj - ); +} + +/* +** Two routines for printing the content of an sqlite3_index_info +** structure. Used for testing and debugging only. If neither +** SQLITE_TEST or SQLITE_DEBUG are defined, then these routines +** are no-ops. +*/ +#if !defined(SQLITE_OMIT_VIRTUALTABLE) && defined(WHERETRACE_ENABLED) +static void TRACE_IDX_INPUTS(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; i<p->nConstraint; i++){ + sqlite3DebugPrintf(" constraint[%d]: col=%d termid=%d op=%d usabled=%d\n", + i, + p->aConstraint[i].iColumn, + p->aConstraint[i].iTermOffset, + p->aConstraint[i].op, + p->aConstraint[i].usable); + } + for(i=0; i<p->nOrderBy; i++){ + sqlite3DebugPrintf(" orderby[%d]: col=%d desc=%d\n", + i, + p->aOrderBy[i].iColumn, + p->aOrderBy[i].desc); + } +} +static void TRACE_IDX_OUTPUTS(sqlite3_index_info *p){ + int i; + if( !sqlite3WhereTrace ) return; + for(i=0; i<p->nConstraint; i++){ + sqlite3DebugPrintf(" usage[%d]: argvIdx=%d omit=%d\n", + i, + p->aConstraintUsage[i].argvIndex, + p->aConstraintUsage[i].omit); + } + sqlite3DebugPrintf(" idxNum=%d\n", p->idxNum); + sqlite3DebugPrintf(" idxStr=%s\n", p->idxStr); + sqlite3DebugPrintf(" orderByConsumed=%d\n", p->orderByConsumed); + sqlite3DebugPrintf(" estimatedCost=%g\n", p->estimatedCost); + sqlite3DebugPrintf(" estimatedRows=%lld\n", p->estimatedRows); } #else -# define addScanStatus(a, b, c, d) ((void)d) +#define TRACE_IDX_INPUTS(A) +#define TRACE_IDX_OUTPUTS(A) #endif +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* -** If the most recently coded instruction is a constant range contraint -** that originated from the LIKE optimization, then change the P3 to be -** pLoop->iLikeRepCntr and set P5. -** -** The LIKE optimization trys to evaluate "x LIKE 'abc%'" as a range -** expression: "x>='ABC' AND x<'abd'". But this requires that the range -** scan loop run twice, once for strings and a second time for BLOBs. -** The OP_String opcodes on the second pass convert the upper and lower -** bound string contants to blobs. This routine makes the necessary changes -** to the OP_String opcodes for that to happen. +** Return TRUE if the WHERE clause term pTerm is of a form where it +** could be used with an index to access pSrc, assuming an appropriate +** index existed. */ -static void whereLikeOptimizationStringFixup( - Vdbe *v, /* prepared statement under construction */ - WhereLevel *pLevel, /* The loop that contains the LIKE operator */ - WhereTerm *pTerm /* The upper or lower bound just coded */ +static int termCanDriveIndex( + WhereTerm *pTerm, /* WHERE clause term to check */ + struct SrcList_item *pSrc, /* Table we are trying to access */ + Bitmask notReady /* Tables in outer loops of the join */ ){ - if( pTerm->wtFlags & TERM_LIKEOPT ){ - VdbeOp *pOp; - assert( pLevel->iLikeRepCntr>0 ); - pOp = sqlite3VdbeGetOp(v, -1); - assert( pOp!=0 ); - assert( pOp->opcode==OP_String8 - || pTerm->pWC->pWInfo->pParse->db->mallocFailed ); - pOp->p3 = pLevel->iLikeRepCntr; - pOp->p5 = 1; - } + char aff; + if( pTerm->leftCursor!=pSrc->iCursor ) return 0; + if( (pTerm->eOperator & (WO_EQ|WO_IS))==0 ) return 0; + if( (pTerm->prereqRight & notReady)!=0 ) return 0; + if( pTerm->u.leftColumn<0 ) return 0; + aff = pSrc->pTab->aCol[pTerm->u.leftColumn].affinity; + if( !sqlite3IndexAffinityOk(pTerm->pExpr, aff) ) return 0; + testcase( pTerm->pExpr->op==TK_IS ); + return 1; } +#endif + +#ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* -** Generate code for the start of the iLevel-th loop in the WHERE clause -** implementation described by pWInfo. +** Generate code to construct the Index object for an automatic index +** and to set up the WhereLevel object pLevel so that the code generator +** makes use of the automatic index. */ -static Bitmask codeOneLoopStart( - WhereInfo *pWInfo, /* Complete information about the WHERE clause */ - int iLevel, /* Which level of pWInfo->a[] should be coded */ - Bitmask notReady /* Which tables are currently available */ +static void constructAutomaticIndex( + Parse *pParse, /* The parsing context */ + WhereClause *pWC, /* The WHERE clause */ + struct SrcList_item *pSrc, /* The FROM clause term to get the next index */ + Bitmask notReady, /* Mask of cursors that are not available */ + WhereLevel *pLevel /* Write new index here */ ){ - int j, k; /* Loop counters */ - int iCur; /* The VDBE cursor for the table */ - int addrNxt; /* Where to jump to continue with the next IN case */ - int omitTable; /* True if we use the index only */ - int bRev; /* True if we need to scan in reverse order */ - WhereLevel *pLevel; /* The where level to be coded */ - WhereLoop *pLoop; /* The WhereLoop object being coded */ - WhereClause *pWC; /* Decomposition of the entire WHERE clause */ - WhereTerm *pTerm; /* A WHERE clause term */ - Parse *pParse; /* Parsing context */ - sqlite3 *db; /* Database connection */ - Vdbe *v; /* The prepared stmt under constructions */ - struct SrcList_item *pTabItem; /* FROM clause term being coded */ - int addrBrk; /* Jump here to break out of the loop */ - int addrCont; /* Jump here to continue with next cycle */ - int iRowidReg = 0; /* Rowid is stored in this register, if not zero */ - int iReleaseReg = 0; /* Temp register to free before returning */ + int nKeyCol; /* Number of columns in the constructed index */ + WhereTerm *pTerm; /* A single term of the WHERE clause */ + WhereTerm *pWCEnd; /* End of pWC->a[] */ + Index *pIdx; /* Object describing the transient index */ + Vdbe *v; /* Prepared statement under construction */ + int addrInit; /* Address of the initialization bypass jump */ + Table *pTable; /* The table being indexed */ + int addrTop; /* Top of the index fill loop */ + int regRecord; /* Register holding an index record */ + int n; /* Column counter */ + int i; /* Loop counter */ + int mxBitCol; /* Maximum column in pSrc->colUsed */ + CollSeq *pColl; /* Collating sequence to on a column */ + WhereLoop *pLoop; /* The Loop object */ + char *zNotUsed; /* Extra space on the end of pIdx */ + Bitmask idxCols; /* Bitmap of columns used for indexing */ + Bitmask extraCols; /* Bitmap of additional columns */ + u8 sentWarning = 0; /* True if a warnning has been issued */ + Expr *pPartial = 0; /* Partial Index Expression */ + int iContinue = 0; /* Jump here to skip excluded rows */ + struct SrcList_item *pTabItem; /* FROM clause term being indexed */ - pParse = pWInfo->pParse; + /* Generate code to skip over the creation and initialization of the + ** transient index on 2nd and subsequent iterations of the loop. */ v = pParse->pVdbe; - pWC = &pWInfo->sWC; - db = pParse->db; - pLevel = &pWInfo->a[iLevel]; + assert( v!=0 ); + addrInit = sqlite3CodeOnce(pParse); VdbeCoverage(v); + + /* Count the number of columns that will be added to the index + ** and used to match WHERE clause constraints */ + nKeyCol = 0; + pTable = pSrc->pTab; + pWCEnd = &pWC->a[pWC->nTerm]; pLoop = pLevel->pWLoop; - pTabItem = &pWInfo->pTabList->a[pLevel->iFrom]; - iCur = pTabItem->iCursor; - pLevel->notReady = notReady & ~getMask(&pWInfo->sMaskSet, iCur); - bRev = (pWInfo->revMask>>iLevel)&1; - omitTable = (pLoop->wsFlags & WHERE_IDX_ONLY)!=0 - && (pWInfo->wctrlFlags & WHERE_FORCE_TABLE)==0; - VdbeModuleComment((v, "Begin WHERE-loop%d: %s",iLevel,pTabItem->pTab->zName)); + idxCols = 0; + for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){ + Expr *pExpr = pTerm->pExpr; + assert( !ExprHasProperty(pExpr, EP_FromJoin) /* prereq always non-zero */ + || pExpr->iRightJoinTable!=pSrc->iCursor /* for the right-hand */ + || pLoop->prereq!=0 ); /* table of a LEFT JOIN */ + if( pLoop->prereq==0 + && (pTerm->wtFlags & TERM_VIRTUAL)==0 + && !ExprHasProperty(pExpr, EP_FromJoin) + && sqlite3ExprIsTableConstant(pExpr, pSrc->iCursor) ){ + pPartial = sqlite3ExprAnd(pParse->db, pPartial, + sqlite3ExprDup(pParse->db, pExpr, 0)); + } + if( termCanDriveIndex(pTerm, pSrc, notReady) ){ + int iCol = pTerm->u.leftColumn; + Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS ); + testcase( iCol==BMS-1 ); + if( !sentWarning ){ + sqlite3_log(SQLITE_WARNING_AUTOINDEX, + "automatic index on %s(%s)", pTable->zName, + pTable->aCol[iCol].zName); + sentWarning = 1; + } + if( (idxCols & cMask)==0 ){ + if( whereLoopResize(pParse->db, pLoop, nKeyCol+1) ){ + goto end_auto_index_create; + } + pLoop->aLTerm[nKeyCol++] = pTerm; + idxCols |= cMask; + } + } + } + assert( nKeyCol>0 ); + pLoop->u.btree.nEq = pLoop->nLTerm = nKeyCol; + pLoop->wsFlags = WHERE_COLUMN_EQ | WHERE_IDX_ONLY | WHERE_INDEXED + | WHERE_AUTO_INDEX; - /* Create labels for the "break" and "continue" instructions - ** for the current loop. Jump to addrBrk to break out of a loop. - ** Jump to cont to go immediately to the next iteration of the - ** loop. - ** - ** When there is an IN operator, we also have a "addrNxt" label that - ** means to continue with the next IN value combination. When - ** there are no IN operators in the constraints, the "addrNxt" label - ** is the same as "addrBrk". + /* Count the number of additional columns needed to create a + ** covering index. A "covering index" is an index that contains all + ** columns that are needed by the query. With a covering index, the + ** original table never needs to be accessed. Automatic indices must + ** be a covering index because the index will not be updated if the + ** original table changes and the index and table cannot both be used + ** if they go out of sync. */ - addrBrk = pLevel->addrBrk = pLevel->addrNxt = sqlite3VdbeMakeLabel(v); - addrCont = pLevel->addrCont = sqlite3VdbeMakeLabel(v); + extraCols = pSrc->colUsed & (~idxCols | MASKBIT(BMS-1)); + mxBitCol = MIN(BMS-1,pTable->nCol); + testcase( pTable->nCol==BMS-1 ); + testcase( pTable->nCol==BMS-2 ); + for(i=0; i<mxBitCol; i++){ + if( extraCols & MASKBIT(i) ) nKeyCol++; + } + if( pSrc->colUsed & MASKBIT(BMS-1) ){ + nKeyCol += pTable->nCol - BMS + 1; + } - /* If this is the right table of a LEFT OUTER JOIN, allocate and - ** initialize a memory cell that records if this table matches any - ** row of the left table of the join. - */ - if( pLevel->iFrom>0 && (pTabItem[0].jointype & JT_LEFT)!=0 ){ - pLevel->iLeftJoin = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Integer, 0, pLevel->iLeftJoin); - VdbeComment((v, "init LEFT JOIN no-match flag")); + /* Construct the Index object to describe this index */ + pIdx = sqlite3AllocateIndexObject(pParse->db, nKeyCol+1, 0, &zNotUsed); + if( pIdx==0 ) goto end_auto_index_create; + pLoop->u.btree.pIndex = pIdx; + pIdx->zName = "auto-index"; + pIdx->pTable = pTable; + n = 0; + idxCols = 0; + for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){ + if( termCanDriveIndex(pTerm, pSrc, notReady) ){ + int iCol = pTerm->u.leftColumn; + Bitmask cMask = iCol>=BMS ? MASKBIT(BMS-1) : MASKBIT(iCol); + testcase( iCol==BMS-1 ); + testcase( iCol==BMS ); + if( (idxCols & cMask)==0 ){ + Expr *pX = pTerm->pExpr; + idxCols |= cMask; + pIdx->aiColumn[n] = pTerm->u.leftColumn; + pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight); + pIdx->azColl[n] = pColl ? pColl->zName : "BINARY"; + n++; + } + } } + assert( (u32)n==pLoop->u.btree.nEq ); - /* Special case of a FROM clause subquery implemented as a co-routine */ + /* Add additional columns needed to make the automatic index into + ** a covering index */ + for(i=0; i<mxBitCol; i++){ + if( extraCols & MASKBIT(i) ){ + pIdx->aiColumn[n] = i; + pIdx->azColl[n] = "BINARY"; + n++; + } + } + if( pSrc->colUsed & MASKBIT(BMS-1) ){ + for(i=BMS-1; i<pTable->nCol; i++){ + pIdx->aiColumn[n] = i; + pIdx->azColl[n] = "BINARY"; + n++; + } + } + assert( n==nKeyCol ); + pIdx->aiColumn[n] = -1; + pIdx->azColl[n] = "BINARY"; + + /* Create the automatic index */ + assert( pLevel->iIdxCur>=0 ); + pLevel->iIdxCur = pParse->nTab++; + sqlite3VdbeAddOp2(v, OP_OpenAutoindex, pLevel->iIdxCur, nKeyCol+1); + sqlite3VdbeSetP4KeyInfo(pParse, pIdx); + VdbeComment((v, "for %s", pTable->zName)); + + /* Fill the automatic index with content */ + sqlite3ExprCachePush(pParse); + pTabItem = &pWC->pWInfo->pTabList->a[pLevel->iFrom]; if( pTabItem->viaCoroutine ){ int regYield = pTabItem->regReturn; sqlite3VdbeAddOp3(v, OP_InitCoroutine, regYield, 0, pTabItem->addrFillSub); - pLevel->p2 = sqlite3VdbeAddOp2(v, OP_Yield, regYield, addrBrk); + addrTop = sqlite3VdbeAddOp1(v, OP_Yield, regYield); VdbeCoverage(v); VdbeComment((v, "next row of \"%s\"", pTabItem->pTab->zName)); - pLevel->op = OP_Goto; - }else + }else{ + addrTop = sqlite3VdbeAddOp1(v, OP_Rewind, pLevel->iTabCur); VdbeCoverage(v); + } + if( pPartial ){ + iContinue = sqlite3VdbeMakeLabel(v); + sqlite3ExprIfFalse(pParse, pPartial, iContinue, SQLITE_JUMPIFNULL); + pLoop->wsFlags |= WHERE_PARTIALIDX; + } + regRecord = sqlite3GetTempReg(pParse); + sqlite3GenerateIndexKey(pParse, pIdx, pLevel->iTabCur, regRecord, 0, 0, 0, 0); + sqlite3VdbeAddOp2(v, OP_IdxInsert, pLevel->iIdxCur, regRecord); + sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); + if( pPartial ) sqlite3VdbeResolveLabel(v, iContinue); + if( pTabItem->viaCoroutine ){ + translateColumnToCopy(v, addrTop, pLevel->iTabCur, pTabItem->regResult); + sqlite3VdbeAddOp2(v, OP_Goto, 0, addrTop); + pTabItem->viaCoroutine = 0; + }else{ + sqlite3VdbeAddOp2(v, OP_Next, pLevel->iTabCur, addrTop+1); VdbeCoverage(v); + } + sqlite3VdbeChangeP5(v, SQLITE_STMTSTATUS_AUTOINDEX); + sqlite3VdbeJumpHere(v, addrTop); + sqlite3ReleaseTempReg(pParse, regRecord); + sqlite3ExprCachePop(pParse); + + /* Jump here when skipping the initialization */ + sqlite3VdbeJumpHere(v, addrInit); + +end_auto_index_create: + sqlite3ExprDelete(pParse->db, pPartial); +} +#endif /* SQLITE_OMIT_AUTOMATIC_INDEX */ #ifndef SQLITE_OMIT_VIRTUALTABLE - if( (pLoop->wsFlags & WHERE_VIRTUALTABLE)!=0 ){ - /* Case 1: The table is a virtual-table. Use the VFilter and VNext - ** to access the data. - */ - int iReg; /* P3 Value for OP_VFilter */ - int addrNotFound; - int nConstraint = pLoop->nLTerm; +/* +** Allocate and populate an sqlite3_index_info structure. It is the +** responsibility of the caller to eventually release the structure +** by passing the pointer returned by this function to sqlite3_free(). +*/ +static sqlite3_index_info *allocateIndexInfo( + Parse *pParse, + WhereClause *pWC, + Bitmask mUnusable, /* Ignore terms with these prereqs */ + struct SrcList_item *pSrc, + ExprList *pOrderBy +){ + int i, j; + int nTerm; + struct sqlite3_index_constraint *pIdxCons; + struct sqlite3_index_orderby *pIdxOrderBy; + struct sqlite3_index_constraint_usage *pUsage; + WhereTerm *pTerm; + int nOrderBy; + sqlite3_index_info *pIdxInfo; - sqlite3ExprCachePush(pParse); - iReg = sqlite3GetTempRange(pParse, nConstraint+2); - addrNotFound = pLevel->addrBrk; - for(j=0; j<nConstraint; j++){ - int iTarget = iReg+j+2; - pTerm = pLoop->aLTerm[j]; - if( pTerm==0 ) continue; - if( pTerm->eOperator & WO_IN ){ - codeEqualityTerm(pParse, pTerm, pLevel, j, bRev, iTarget); - addrNotFound = pLevel->addrNxt; - }else{ - sqlite3ExprCode(pParse, pTerm->pExpr->pRight, iTarget); - } + /* Count the number of possible WHERE clause constraints referring + ** to this virtual table */ + for(i=nTerm=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ + if( pTerm->leftCursor != pSrc->iCursor ) continue; + if( pTerm->prereqRight & mUnusable ) continue; + assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); + testcase( pTerm->eOperator & WO_IN ); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_IS ); + testcase( pTerm->eOperator & WO_ALL ); + if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; + nTerm++; + } + + /* If the ORDER BY clause contains only columns in the current + ** virtual table then allocate space for the aOrderBy part of + ** the sqlite3_index_info structure. + */ + nOrderBy = 0; + if( pOrderBy ){ + int n = pOrderBy->nExpr; + for(i=0; i<n; i++){ + Expr *pExpr = pOrderBy->a[i].pExpr; + if( pExpr->op!=TK_COLUMN || pExpr->iTable!=pSrc->iCursor ) break; } - sqlite3VdbeAddOp2(v, OP_Integer, pLoop->u.vtab.idxNum, iReg); - sqlite3VdbeAddOp2(v, OP_Integer, nConstraint, iReg+1); - sqlite3VdbeAddOp4(v, OP_VFilter, iCur, addrNotFound, iReg, - pLoop->u.vtab.idxStr, - pLoop->u.vtab.needFree ? P4_MPRINTF : P4_STATIC); - VdbeCoverage(v); - pLoop->u.vtab.needFree = 0; - for(j=0; j<nConstraint && j<16; j++){ - if( (pLoop->u.vtab.omitMask>>j)&1 ){ - disableTerm(pLevel, pLoop->aLTerm[j]); - } + if( i==n){ + nOrderBy = n; } - pLevel->op = OP_VNext; - pLevel->p1 = iCur; - pLevel->p2 = sqlite3VdbeCurrentAddr(v); - sqlite3ReleaseTempRange(pParse, iReg, nConstraint+2); - sqlite3ExprCachePop(pParse); - }else -#endif /* SQLITE_OMIT_VIRTUALTABLE */ + } - if( (pLoop->wsFlags & WHERE_IPK)!=0 - && (pLoop->wsFlags & (WHERE_COLUMN_IN|WHERE_COLUMN_EQ))!=0 - ){ - /* Case 2: We can directly reference a single row using an - ** equality comparison against the ROWID field. Or - ** we reference multiple rows using a "rowid IN (...)" - ** construct. - */ - assert( pLoop->u.btree.nEq==1 ); - pTerm = pLoop->aLTerm[0]; - assert( pTerm!=0 ); - assert( pTerm->pExpr!=0 ); - assert( omitTable==0 ); - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - iReleaseReg = ++pParse->nMem; - iRowidReg = codeEqualityTerm(pParse, pTerm, pLevel, 0, bRev, iReleaseReg); - if( iRowidReg!=iReleaseReg ) sqlite3ReleaseTempReg(pParse, iReleaseReg); - addrNxt = pLevel->addrNxt; - sqlite3VdbeAddOp2(v, OP_MustBeInt, iRowidReg, addrNxt); VdbeCoverage(v); - sqlite3VdbeAddOp3(v, OP_NotExists, iCur, addrNxt, iRowidReg); - VdbeCoverage(v); - sqlite3ExprCacheAffinityChange(pParse, iRowidReg, 1); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - VdbeComment((v, "pk")); - pLevel->op = OP_Noop; - }else if( (pLoop->wsFlags & WHERE_IPK)!=0 - && (pLoop->wsFlags & WHERE_COLUMN_RANGE)!=0 - ){ - /* Case 3: We have an inequality comparison against the ROWID field. - */ - int testOp = OP_Noop; - int start; - int memEndValue = 0; - WhereTerm *pStart, *pEnd; + /* Allocate the sqlite3_index_info structure + */ + pIdxInfo = sqlite3DbMallocZero(pParse->db, sizeof(*pIdxInfo) + + (sizeof(*pIdxCons) + sizeof(*pUsage))*nTerm + + sizeof(*pIdxOrderBy)*nOrderBy ); + if( pIdxInfo==0 ){ + sqlite3ErrorMsg(pParse, "out of memory"); + return 0; + } - assert( omitTable==0 ); - j = 0; - pStart = pEnd = 0; - if( pLoop->wsFlags & WHERE_BTM_LIMIT ) pStart = pLoop->aLTerm[j++]; - if( pLoop->wsFlags & WHERE_TOP_LIMIT ) pEnd = pLoop->aLTerm[j++]; - assert( pStart!=0 || pEnd!=0 ); - if( bRev ){ - pTerm = pStart; - pStart = pEnd; - pEnd = pTerm; - } - if( pStart ){ - Expr *pX; /* The expression that defines the start bound */ - int r1, rTemp; /* Registers for holding the start boundary */ + /* Initialize the structure. The sqlite3_index_info structure contains + ** many fields that are declared "const" to prevent xBestIndex from + ** changing them. We have to do some funky casting in order to + ** initialize those fields. + */ + pIdxCons = (struct sqlite3_index_constraint*)&pIdxInfo[1]; + pIdxOrderBy = (struct sqlite3_index_orderby*)&pIdxCons[nTerm]; + pUsage = (struct sqlite3_index_constraint_usage*)&pIdxOrderBy[nOrderBy]; + *(int*)&pIdxInfo->nConstraint = nTerm; + *(int*)&pIdxInfo->nOrderBy = nOrderBy; + *(struct sqlite3_index_constraint**)&pIdxInfo->aConstraint = pIdxCons; + *(struct sqlite3_index_orderby**)&pIdxInfo->aOrderBy = pIdxOrderBy; + *(struct sqlite3_index_constraint_usage**)&pIdxInfo->aConstraintUsage = + pUsage; - /* The following constant maps TK_xx codes into corresponding - ** seek opcodes. It depends on a particular ordering of TK_xx - */ - const u8 aMoveOp[] = { - /* TK_GT */ OP_SeekGT, - /* TK_LE */ OP_SeekLE, - /* TK_LT */ OP_SeekLT, - /* TK_GE */ OP_SeekGE - }; - assert( TK_LE==TK_GT+1 ); /* Make sure the ordering.. */ - assert( TK_LT==TK_GT+2 ); /* ... of the TK_xx values... */ - assert( TK_GE==TK_GT+3 ); /* ... is correcct. */ + for(i=j=0, pTerm=pWC->a; i<pWC->nTerm; i++, pTerm++){ + u8 op; + if( pTerm->leftCursor != pSrc->iCursor ) continue; + if( pTerm->prereqRight & mUnusable ) continue; + assert( IsPowerOfTwo(pTerm->eOperator & ~WO_EQUIV) ); + testcase( pTerm->eOperator & WO_IN ); + testcase( pTerm->eOperator & WO_IS ); + testcase( pTerm->eOperator & WO_ISNULL ); + testcase( pTerm->eOperator & WO_ALL ); + if( (pTerm->eOperator & ~(WO_ISNULL|WO_EQUIV|WO_IS))==0 ) continue; + if( pTerm->wtFlags & TERM_VNULL ) continue; + pIdxCons[j].iColumn = pTerm->u.leftColumn; + pIdxCons[j].iTermOffset = i; + op = (u8)pTerm->eOperator & WO_ALL; + if( op==WO_IN ) op = WO_EQ; + pIdxCons[j].op = op; + /* The direct assignment in the previous line is possible only because + ** the WO_ and SQLITE_INDEX_CONSTRAINT_ codes are identical. The + ** following asserts verify this fact. */ + assert( WO_EQ==SQLITE_INDEX_CONSTRAINT_EQ ); + assert( WO_LT==SQLITE_INDEX_CONSTRAINT_LT ); + assert( WO_LE==SQLITE_INDEX_CONSTRAINT_LE ); + assert( WO_GT==SQLITE_INDEX_CONSTRAINT_GT ); + assert( WO_GE==SQLITE_INDEX_CONSTRAINT_GE ); + assert( WO_MATCH==SQLITE_INDEX_CONSTRAINT_MATCH ); + assert( pTerm->eOperator & (WO_IN|WO_EQ|WO_LT|WO_LE|WO_GT|WO_GE|WO_MATCH) ); + j++; + } + for(i=0; i<nOrderBy; i++){ + Expr *pExpr = pOrderBy->a[i].pExpr; + pIdxOrderBy[i].iColumn = pExpr->iColumn; + pIdxOrderBy[i].desc = pOrderBy->a[i].sortOrder; + } - assert( (pStart->wtFlags & TERM_VNULL)==0 ); - testcase( pStart->wtFlags & TERM_VIRTUAL ); - pX = pStart->pExpr; - assert( pX!=0 ); - testcase( pStart->leftCursor!=iCur ); /* transitive constraints */ - r1 = sqlite3ExprCodeTemp(pParse, pX->pRight, &rTemp); - sqlite3VdbeAddOp3(v, aMoveOp[pX->op-TK_GT], iCur, addrBrk, r1); - VdbeComment((v, "pk")); - VdbeCoverageIf(v, pX->op==TK_GT); - VdbeCoverageIf(v, pX->op==TK_LE); - VdbeCoverageIf(v, pX->op==TK_LT); - VdbeCoverageIf(v, pX->op==TK_GE); - sqlite3ExprCacheAffinityChange(pParse, r1, 1); - sqlite3ReleaseTempReg(pParse, rTemp); - disableTerm(pLevel, pStart); - }else{ - sqlite3VdbeAddOp2(v, bRev ? OP_Last : OP_Rewind, iCur, addrBrk); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - } - if( pEnd ){ - Expr *pX; - pX = pEnd->pExpr; - assert( pX!=0 ); - assert( (pEnd->wtFlags & TERM_VNULL)==0 ); - testcase( pEnd->leftCursor!=iCur ); /* Transitive constraints */ - testcase( pEnd->wtFlags & TERM_VIRTUAL ); - memEndValue = ++pParse->nMem; - sqlite3ExprCode(pParse, pX->pRight, memEndValue); - if( pX->op==TK_LT || pX->op==TK_GT ){ - testOp = bRev ? OP_Le : OP_Ge; - }else{ - testOp = bRev ? OP_Lt : OP_Gt; - } - disableTerm(pLevel, pEnd); - } - start = sqlite3VdbeCurrentAddr(v); - pLevel->op = bRev ? OP_Prev : OP_Next; - pLevel->p1 = iCur; - pLevel->p2 = start; - assert( pLevel->p5==0 ); - if( testOp!=OP_Noop ){ - iRowidReg = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Rowid, iCur, iRowidReg); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - sqlite3VdbeAddOp3(v, testOp, memEndValue, addrBrk, iRowidReg); - VdbeCoverageIf(v, testOp==OP_Le); - VdbeCoverageIf(v, testOp==OP_Lt); - VdbeCoverageIf(v, testOp==OP_Ge); - VdbeCoverageIf(v, testOp==OP_Gt); - sqlite3VdbeChangeP5(v, SQLITE_AFF_NUMERIC | SQLITE_JUMPIFNULL); - } - }else if( pLoop->wsFlags & WHERE_INDEXED ){ - /* Case 4: A scan using an index. - ** - ** The WHERE clause may contain zero or more equality - ** terms ("==" or "IN" operators) that refer to the N - ** left-most columns of the index. It may also contain - ** inequality constraints (>, <, >= or <=) on the indexed - ** column that immediately follows the N equalities. Only - ** the right-most column can be an inequality - the rest must - ** use the "==" and "IN" operators. For example, if the - ** index is on (x,y,z), then the following clauses are all - ** optimized: - ** - ** x=5 - ** x=5 AND y=10 - ** x=5 AND y<10 - ** x=5 AND y>5 AND y<10 - ** x=5 AND y=5 AND z<=10 - ** - ** The z<10 term of the following cannot be used, only - ** the x=5 term: - ** - ** x=5 AND z<10 - ** - ** N may be zero if there are inequality constraints. - ** If there are no inequality constraints, then N is at - ** least one. - ** - ** This case is also used when there are no WHERE clause - ** constraints but an index is selected anyway, in order - ** to force the output order to conform to an ORDER BY. - */ - static const u8 aStartOp[] = { - 0, - 0, - OP_Rewind, /* 2: (!start_constraints && startEq && !bRev) */ - OP_Last, /* 3: (!start_constraints && startEq && bRev) */ - OP_SeekGT, /* 4: (start_constraints && !startEq && !bRev) */ - OP_SeekLT, /* 5: (start_constraints && !startEq && bRev) */ - OP_SeekGE, /* 6: (start_constraints && startEq && !bRev) */ - OP_SeekLE /* 7: (start_constraints && startEq && bRev) */ - }; - static const u8 aEndOp[] = { - OP_IdxGE, /* 0: (end_constraints && !bRev && !endEq) */ - OP_IdxGT, /* 1: (end_constraints && !bRev && endEq) */ - OP_IdxLE, /* 2: (end_constraints && bRev && !endEq) */ - OP_IdxLT, /* 3: (end_constraints && bRev && endEq) */ - }; - u16 nEq = pLoop->u.btree.nEq; /* Number of == or IN terms */ - int regBase; /* Base register holding constraint values */ - WhereTerm *pRangeStart = 0; /* Inequality constraint at range start */ - WhereTerm *pRangeEnd = 0; /* Inequality constraint at range end */ - int startEq; /* True if range start uses ==, >= or <= */ - int endEq; /* True if range end uses ==, >= or <= */ - int start_constraints; /* Start of range is constrained */ - int nConstraint; /* Number of constraint terms */ - Index *pIdx; /* The index we will be using */ - int iIdxCur; /* The VDBE cursor for the index */ - int nExtraReg = 0; /* Number of extra registers needed */ - int op; /* Instruction opcode */ - char *zStartAff; /* Affinity for start of range constraint */ - char cEndAff = 0; /* Affinity for end of range constraint */ - u8 bSeekPastNull = 0; /* True to seek past initial nulls */ - u8 bStopAtNull = 0; /* Add condition to terminate at NULLs */ + return pIdxInfo; +} - pIdx = pLoop->u.btree.pIndex; - iIdxCur = pLevel->iIdxCur; - assert( nEq>=pLoop->nSkip ); +/* +** The table object reference passed as the second argument to this function +** must represent a virtual table. This function invokes the xBestIndex() +** method of the virtual table with the sqlite3_index_info object that +** comes in as the 3rd argument to this function. +** +** If an error occurs, pParse is populated with an error message and a +** non-zero value is returned. Otherwise, 0 is returned and the output +** part of the sqlite3_index_info structure is left populated. +** +** Whether or not an error is returned, it is the responsibility of the +** caller to eventually free p->idxStr if p->needToFreeIdxStr indicates +** that this is required. +*/ +static int vtabBestIndex(Parse *pParse, Table *pTab, sqlite3_index_info *p){ + sqlite3_vtab *pVtab = sqlite3GetVTable(pParse->db, pTab)->pVtab; + int i; + int rc; - /* If this loop satisfies a sort order (pOrderBy) request that - ** was passed to this function to implement a "SELECT min(x) ..." - ** query, then the caller will only allow the loop to run for - ** a single iteration. This means that the first row returned - ** should not have a NULL value stored in 'x'. If column 'x' is - ** the first one after the nEq equality constraints in the index, - ** this requires some special handling. - */ - assert( pWInfo->pOrderBy==0 - || pWInfo->pOrderBy->nExpr==1 - || (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)==0 ); - if( (pWInfo->wctrlFlags&WHERE_ORDERBY_MIN)!=0 - && pWInfo->nOBSat>0 - && (pIdx->nKeyCol>nEq) - ){ - assert( pLoop->nSkip==0 ); - bSeekPastNull = 1; - nExtraReg = 1; - } + TRACE_IDX_INPUTS(p); + rc = pVtab->pModule->xBestIndex(pVtab, p); + TRACE_IDX_OUTPUTS(p); - /* Find any inequality constraint terms for the start and end - ** of the range. - */ - j = nEq; - if( pLoop->wsFlags & WHERE_BTM_LIMIT ){ - pRangeStart = pLoop->aLTerm[j++]; - nExtraReg = 1; - /* Like optimization range constraints always occur in pairs */ - assert( (pRangeStart->wtFlags & TERM_LIKEOPT)==0 || - (pLoop->wsFlags & WHERE_TOP_LIMIT)!=0 ); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_NOMEM ){ + pParse->db->mallocFailed = 1; + }else if( !pVtab->zErrMsg ){ + sqlite3ErrorMsg(pParse, "%s", sqlite3ErrStr(rc)); + }else{ + sqlite3ErrorMsg(pParse, "%s", pVtab->zErrMsg); } - if( pLoop->wsFlags & WHERE_TOP_LIMIT ){ - pRangeEnd = pLoop->aLTerm[j++]; - nExtraReg = 1; - if( (pRangeEnd->wtFlags & TERM_LIKEOPT)!=0 ){ - assert( pRangeStart!=0 ); /* LIKE opt constraints */ - assert( pRangeStart->wtFlags & TERM_LIKEOPT ); /* occur in pairs */ - pLevel->iLikeRepCntr = ++pParse->nMem; - testcase( bRev ); - testcase( pIdx->aSortOrder[nEq]==SQLITE_SO_DESC ); - sqlite3VdbeAddOp2(v, OP_Integer, - bRev ^ (pIdx->aSortOrder[nEq]==SQLITE_SO_DESC), - pLevel->iLikeRepCntr); - VdbeComment((v, "LIKE loop counter")); - pLevel->addrLikeRep = sqlite3VdbeCurrentAddr(v); - } - if( pRangeStart==0 - && (j = pIdx->aiColumn[nEq])>=0 - && pIdx->pTable->aCol[j].notNull==0 - ){ - bSeekPastNull = 1; - } + } + sqlite3_free(pVtab->zErrMsg); + pVtab->zErrMsg = 0; + + for(i=0; i<p->nConstraint; i++){ + if( !p->aConstraint[i].usable && p->aConstraintUsage[i].argvIndex>0 ){ + sqlite3ErrorMsg(pParse, + "table %s: xBestIndex returned an invalid plan", pTab->zName); } - assert( pRangeEnd==0 || (pRangeEnd->wtFlags & TERM_VNULL)==0 ); + } - /* Generate code to evaluate all constraint terms using == or IN - ** and store the values of those terms in an array of registers - ** starting at regBase. - */ - regBase = codeAllEqualityTerms(pParse,pLevel,bRev,nExtraReg,&zStartAff); - assert( zStartAff==0 || sqlite3Strlen30(zStartAff)>=nEq ); - if( zStartAff ) cEndAff = zStartAff[nEq]; - addrNxt = pLevel->addrNxt; + return pParse->nErr; +} +#endif /* !defined(SQLITE_OMIT_VIRTUALTABLE) */ - /* If we are doing a reverse order scan on an ascending index, or - ** a forward order scan on a descending index, interchange the - ** start and end terms (pRangeStart and pRangeEnd). - */ - if( (nEq<pIdx->nKeyCol && bRev==(pIdx->aSortOrder[nEq]==SQLITE_SO_ASC)) - || (bRev && pIdx->nKeyCol==nEq) - ){ - SWAP(WhereTerm *, pRangeEnd, pRangeStart); - SWAP(u8, bSeekPastNull, bStopAtNull); - } +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the location of a particular key among all keys in an +** index. Store the results in aStat as follows: +** +** aStat[0] Est. number of rows less than pRec +** aStat[1] Est. number of rows equal to pRec +** +** Return the index of the sample that is the smallest sample that +** is greater than or equal to pRec. Note that this index is not an index +** into the aSample[] array - it is an index into a virtual set of samples +** based on the contents of aSample[] and the number of fields in record +** pRec. +*/ +static int whereKeyStats( + Parse *pParse, /* Database connection */ + Index *pIdx, /* Index to consider domain of */ + UnpackedRecord *pRec, /* Vector of values to consider */ + int roundUp, /* Round up if true. Round down if false */ + tRowcnt *aStat /* OUT: stats written here */ +){ + IndexSample *aSample = pIdx->aSample; + int iCol; /* Index of required stats in anEq[] etc. */ + int i; /* Index of first sample >= pRec */ + int iSample; /* Smallest sample larger than or equal to pRec */ + int iMin = 0; /* Smallest sample not yet tested */ + int iTest; /* Next sample to test */ + int res; /* Result of comparison operation */ + int nField; /* Number of fields in pRec */ + tRowcnt iLower = 0; /* anLt[] + anEq[] of largest sample pRec is > */ - testcase( pRangeStart && (pRangeStart->eOperator & WO_LE)!=0 ); - testcase( pRangeStart && (pRangeStart->eOperator & WO_GE)!=0 ); - testcase( pRangeEnd && (pRangeEnd->eOperator & WO_LE)!=0 ); - testcase( pRangeEnd && (pRangeEnd->eOperator & WO_GE)!=0 ); - startEq = !pRangeStart || pRangeStart->eOperator & (WO_LE|WO_GE); - endEq = !pRangeEnd || pRangeEnd->eOperator & (WO_LE|WO_GE); - start_constraints = pRangeStart || nEq>0; +#ifndef SQLITE_DEBUG + UNUSED_PARAMETER( pParse ); +#endif + assert( pRec!=0 ); + assert( pIdx->nSample>0 ); + assert( pRec->nField>0 && pRec->nField<=pIdx->nSampleCol ); + + /* Do a binary search to find the first sample greater than or equal + ** to pRec. If pRec contains a single field, the set of samples to search + ** is simply the aSample[] array. If the samples in aSample[] contain more + ** than one fields, all fields following the first are ignored. + ** + ** If pRec contains N fields, where N is more than one, then as well as the + ** samples in aSample[] (truncated to N fields), the search also has to + ** consider prefixes of those samples. For example, if the set of samples + ** in aSample is: + ** + ** aSample[0] = (a, 5) + ** aSample[1] = (a, 10) + ** aSample[2] = (b, 5) + ** aSample[3] = (c, 100) + ** aSample[4] = (c, 105) + ** + ** Then the search space should ideally be the samples above and the + ** unique prefixes [a], [b] and [c]. But since that is hard to organize, + ** the code actually searches this set: + ** + ** 0: (a) + ** 1: (a, 5) + ** 2: (a, 10) + ** 3: (a, 10) + ** 4: (b) + ** 5: (b, 5) + ** 6: (c) + ** 7: (c, 100) + ** 8: (c, 105) + ** 9: (c, 105) + ** + ** For each sample in the aSample[] array, N samples are present in the + ** effective sample array. In the above, samples 0 and 1 are based on + ** sample aSample[0]. Samples 2 and 3 on aSample[1] etc. + ** + ** Often, sample i of each block of N effective samples has (i+1) fields. + ** Except, each sample may be extended to ensure that it is greater than or + ** equal to the previous sample in the array. For example, in the above, + ** sample 2 is the first sample of a block of N samples, so at first it + ** appears that it should be 1 field in size. However, that would make it + ** smaller than sample 1, so the binary search would not work. As a result, + ** it is extended to two fields. The duplicates that this creates do not + ** cause any problems. + */ + nField = pRec->nField; + iCol = 0; + iSample = pIdx->nSample * nField; + do{ + int iSamp; /* Index in aSample[] of test sample */ + int n; /* Number of fields in test sample */ - /* Seek the index cursor to the start of the range. */ - nConstraint = nEq; - if( pRangeStart ){ - Expr *pRight = pRangeStart->pExpr->pRight; - sqlite3ExprCode(pParse, pRight, regBase+nEq); - whereLikeOptimizationStringFixup(v, pLevel, pRangeStart); - if( (pRangeStart->wtFlags & TERM_VNULL)==0 - && sqlite3ExprCanBeNull(pRight) - ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); - VdbeCoverage(v); + iTest = (iMin+iSample)/2; + iSamp = iTest / nField; + if( iSamp>0 ){ + /* The proposed effective sample is a prefix of sample aSample[iSamp]. + ** Specifically, the shortest prefix of at least (1 + iTest%nField) + ** fields that is greater than the previous effective sample. */ + for(n=(iTest % nField) + 1; n<nField; n++){ + if( aSample[iSamp-1].anLt[n-1]!=aSample[iSamp].anLt[n-1] ) break; } - if( zStartAff ){ - if( sqlite3CompareAffinity(pRight, zStartAff[nEq])==SQLITE_AFF_NONE){ - /* Since the comparison is to be performed with no conversions - ** applied to the operands, set the affinity to apply to pRight to - ** SQLITE_AFF_NONE. */ - zStartAff[nEq] = SQLITE_AFF_NONE; - } - if( sqlite3ExprNeedsNoAffinityChange(pRight, zStartAff[nEq]) ){ - zStartAff[nEq] = SQLITE_AFF_NONE; - } - } - nConstraint++; - testcase( pRangeStart->wtFlags & TERM_VIRTUAL ); - }else if( bSeekPastNull ){ - sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); - nConstraint++; - startEq = 0; - start_constraints = 1; + }else{ + n = iTest + 1; } - codeApplyAffinity(pParse, regBase, nConstraint - bSeekPastNull, zStartAff); - op = aStartOp[(start_constraints<<2) + (startEq<<1) + bRev]; - assert( op!=0 ); - sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); - VdbeCoverage(v); - VdbeCoverageIf(v, op==OP_Rewind); testcase( op==OP_Rewind ); - VdbeCoverageIf(v, op==OP_Last); testcase( op==OP_Last ); - VdbeCoverageIf(v, op==OP_SeekGT); testcase( op==OP_SeekGT ); - VdbeCoverageIf(v, op==OP_SeekGE); testcase( op==OP_SeekGE ); - VdbeCoverageIf(v, op==OP_SeekLE); testcase( op==OP_SeekLE ); - VdbeCoverageIf(v, op==OP_SeekLT); testcase( op==OP_SeekLT ); - /* Load the value for the inequality constraint at the end of the - ** range (if any). - */ - nConstraint = nEq; - if( pRangeEnd ){ - Expr *pRight = pRangeEnd->pExpr->pRight; - sqlite3ExprCacheRemove(pParse, regBase+nEq, 1); - sqlite3ExprCode(pParse, pRight, regBase+nEq); - whereLikeOptimizationStringFixup(v, pLevel, pRangeEnd); - if( (pRangeEnd->wtFlags & TERM_VNULL)==0 - && sqlite3ExprCanBeNull(pRight) - ){ - sqlite3VdbeAddOp2(v, OP_IsNull, regBase+nEq, addrNxt); - VdbeCoverage(v); - } - if( sqlite3CompareAffinity(pRight, cEndAff)!=SQLITE_AFF_NONE - && !sqlite3ExprNeedsNoAffinityChange(pRight, cEndAff) - ){ - codeApplyAffinity(pParse, regBase+nEq, 1, &cEndAff); - } - nConstraint++; - testcase( pRangeEnd->wtFlags & TERM_VIRTUAL ); - }else if( bStopAtNull ){ - sqlite3VdbeAddOp2(v, OP_Null, 0, regBase+nEq); - endEq = 0; - nConstraint++; + pRec->nField = n; + res = sqlite3VdbeRecordCompare(aSample[iSamp].n, aSample[iSamp].p, pRec); + if( res<0 ){ + iLower = aSample[iSamp].anLt[n-1] + aSample[iSamp].anEq[n-1]; + iMin = iTest+1; + }else if( res==0 && n<nField ){ + iLower = aSample[iSamp].anLt[n-1]; + iMin = iTest+1; + res = -1; + }else{ + iSample = iTest; + iCol = n-1; } - sqlite3DbFree(db, zStartAff); + }while( res && iMin<iSample ); + i = iSample / nField; - /* Top of the loop body */ - pLevel->p2 = sqlite3VdbeCurrentAddr(v); +#ifdef SQLITE_DEBUG + /* The following assert statements check that the binary search code + ** above found the right answer. This block serves no purpose other + ** than to invoke the asserts. */ + if( pParse->db->mallocFailed==0 ){ + if( res==0 ){ + /* If (res==0) is true, then pRec must be equal to sample i. */ + assert( i<pIdx->nSample ); + assert( iCol==nField-1 ); + pRec->nField = nField; + assert( 0==sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec) + || pParse->db->mallocFailed + ); + }else{ + /* Unless i==pIdx->nSample, indicating that pRec is larger than + ** all samples in the aSample[] array, pRec must be smaller than the + ** (iCol+1) field prefix of sample i. */ + assert( i<=pIdx->nSample && i>=0 ); + pRec->nField = iCol+1; + assert( i==pIdx->nSample + || sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)>0 + || pParse->db->mallocFailed ); - /* Check if the index cursor is past the end of the range. */ - if( nConstraint ){ - op = aEndOp[bRev*2 + endEq]; - sqlite3VdbeAddOp4Int(v, op, iIdxCur, addrNxt, regBase, nConstraint); - testcase( op==OP_IdxGT ); VdbeCoverageIf(v, op==OP_IdxGT ); - testcase( op==OP_IdxGE ); VdbeCoverageIf(v, op==OP_IdxGE ); - testcase( op==OP_IdxLT ); VdbeCoverageIf(v, op==OP_IdxLT ); - testcase( op==OP_IdxLE ); VdbeCoverageIf(v, op==OP_IdxLE ); + /* if i==0 and iCol==0, then record pRec is smaller than all samples + ** in the aSample[] array. Otherwise, if (iCol>0) then pRec must + ** be greater than or equal to the (iCol) field prefix of sample i. + ** If (i>0), then pRec must also be greater than sample (i-1). */ + if( iCol>0 ){ + pRec->nField = iCol; + assert( sqlite3VdbeRecordCompare(aSample[i].n, aSample[i].p, pRec)<=0 + || pParse->db->mallocFailed ); + } + if( i>0 ){ + pRec->nField = nField; + assert( sqlite3VdbeRecordCompare(aSample[i-1].n, aSample[i-1].p, pRec)<0 + || pParse->db->mallocFailed ); + } } + } +#endif /* ifdef SQLITE_DEBUG */ - /* Seek the table cursor, if required */ - disableTerm(pLevel, pRangeStart); - disableTerm(pLevel, pRangeEnd); - if( omitTable ){ - /* pIdx is a covering index. No need to access the main table. */ - }else if( HasRowid(pIdx->pTable) ){ - iRowidReg = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_IdxRowid, iIdxCur, iRowidReg); - sqlite3ExprCacheStore(pParse, iCur, -1, iRowidReg); - sqlite3VdbeAddOp2(v, OP_Seek, iCur, iRowidReg); /* Deferred seek */ - }else if( iCur!=iIdxCur ){ - Index *pPk = sqlite3PrimaryKeyIndex(pIdx->pTable); - iRowidReg = sqlite3GetTempRange(pParse, pPk->nKeyCol); - for(j=0; j<pPk->nKeyCol; j++){ - k = sqlite3ColumnOfIndex(pIdx, pPk->aiColumn[j]); - sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, k, iRowidReg+j); - } - sqlite3VdbeAddOp4Int(v, OP_NotFound, iCur, addrCont, - iRowidReg, pPk->nKeyCol); VdbeCoverage(v); + if( res==0 ){ + /* Record pRec is equal to sample i */ + assert( iCol==nField-1 ); + aStat[0] = aSample[i].anLt[iCol]; + aStat[1] = aSample[i].anEq[iCol]; + }else{ + /* At this point, the (iCol+1) field prefix of aSample[i] is the first + ** sample that is greater than pRec. Or, if i==pIdx->nSample then pRec + ** is larger than all samples in the array. */ + tRowcnt iUpper, iGap; + if( i>=pIdx->nSample ){ + iUpper = sqlite3LogEstToInt(pIdx->aiRowLogEst[0]); + }else{ + iUpper = aSample[i].anLt[iCol]; } - /* Record the instruction used to terminate the loop. Disable - ** WHERE clause terms made redundant by the index range scan. - */ - if( pLoop->wsFlags & WHERE_ONEROW ){ - pLevel->op = OP_Noop; - }else if( bRev ){ - pLevel->op = OP_Prev; + if( iLower>=iUpper ){ + iGap = 0; }else{ - pLevel->op = OP_Next; + iGap = iUpper - iLower; } - pLevel->p1 = iIdxCur; - pLevel->p3 = (pLoop->wsFlags&WHERE_UNQ_WANTED)!=0 ? 1:0; - if( (pLoop->wsFlags & WHERE_CONSTRAINT)==0 ){ - pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + if( roundUp ){ + iGap = (iGap*2)/3; }else{ - assert( pLevel->p5==0 ); + iGap = iGap/3; } - }else - -#ifndef SQLITE_OMIT_OR_OPTIMIZATION - if( pLoop->wsFlags & WHERE_MULTI_OR ){ - /* Case 5: Two or more separately indexed terms connected by OR - ** - ** Example: - ** - ** CREATE TABLE t1(a,b,c,d); - ** CREATE INDEX i1 ON t1(a); - ** CREATE INDEX i2 ON t1(b); - ** CREATE INDEX i3 ON t1(c); - ** - ** SELECT * FROM t1 WHERE a=5 OR b=7 OR (c=11 AND d=13) - ** - ** In the example, there are three indexed terms connected by OR. - ** The top of the loop looks like this: - ** - ** Null 1 # Zero the rowset in reg 1 - ** - ** Then, for each indexed term, the following. The arguments to - ** RowSetTest are such that the rowid of the current row is inserted - ** into the RowSet. If it is already present, control skips the - ** Gosub opcode and jumps straight to the code generated by WhereEnd(). - ** - ** sqlite3WhereBegin(<term>) - ** RowSetTest # Insert rowid into rowset - ** Gosub 2 A - ** sqlite3WhereEnd() - ** - ** Following the above, code to terminate the loop. Label A, the target - ** of the Gosub above, jumps to the instruction right after the Goto. - ** - ** Null 1 # Zero the rowset in reg 1 - ** Goto B # The loop is finished. - ** - ** A: <loop body> # Return data, whatever. - ** - ** Return 2 # Jump back to the Gosub - ** - ** B: <after the loop> - ** - ** Added 2014-05-26: If the table is a WITHOUT ROWID table, then - ** use an ephemeral index instead of a RowSet to record the primary - ** keys of the rows we have already seen. - ** - */ - WhereClause *pOrWc; /* The OR-clause broken out into subterms */ - SrcList *pOrTab; /* Shortened table list or OR-clause generation */ - Index *pCov = 0; /* Potential covering index (or NULL) */ - int iCovCur = pParse->nTab++; /* Cursor used for index scans (if any) */ + aStat[0] = iLower + iGap; + aStat[1] = pIdx->aAvgEq[iCol]; + } - int regReturn = ++pParse->nMem; /* Register used with OP_Gosub */ - int regRowset = 0; /* Register for RowSet object */ - int regRowid = 0; /* Register holding rowid */ - int iLoopBody = sqlite3VdbeMakeLabel(v); /* Start of loop body */ - int iRetInit; /* Address of regReturn init */ - int untestedTerms = 0; /* Some terms not completely tested */ - int ii; /* Loop counter */ - u16 wctrlFlags; /* Flags for sub-WHERE clause */ - Expr *pAndExpr = 0; /* An ".. AND (...)" expression */ - Table *pTab = pTabItem->pTab; - - pTerm = pLoop->aLTerm[0]; - assert( pTerm!=0 ); - assert( pTerm->eOperator & WO_OR ); - assert( (pTerm->wtFlags & TERM_ORINFO)!=0 ); - pOrWc = &pTerm->u.pOrInfo->wc; - pLevel->op = OP_Return; - pLevel->p1 = regReturn; + /* Restore the pRec->nField value before returning. */ + pRec->nField = nField; + return i; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - /* Set up a new SrcList in pOrTab containing the table being scanned - ** by this loop in the a[0] slot and all notReady tables in a[1..] slots. - ** This becomes the SrcList in the recursive call to sqlite3WhereBegin(). - */ - if( pWInfo->nLevel>1 ){ - int nNotReady; /* The number of notReady tables */ - struct SrcList_item *origSrc; /* Original list of tables */ - nNotReady = pWInfo->nLevel - iLevel - 1; - pOrTab = sqlite3StackAllocRaw(db, - sizeof(*pOrTab)+ nNotReady*sizeof(pOrTab->a[0])); - if( pOrTab==0 ) return notReady; - pOrTab->nAlloc = (u8)(nNotReady + 1); - pOrTab->nSrc = pOrTab->nAlloc; - memcpy(pOrTab->a, pTabItem, sizeof(*pTabItem)); - origSrc = pWInfo->pTabList->a; - for(k=1; k<=nNotReady; k++){ - memcpy(&pOrTab->a[k], &origSrc[pLevel[k].iFrom], sizeof(pOrTab->a[k])); - } - }else{ - pOrTab = pWInfo->pTabList; +/* +** If it is not NULL, pTerm is a term that provides an upper or lower +** bound on a range scan. Without considering pTerm, it is estimated +** that the scan will visit nNew rows. This function returns the number +** estimated to be visited after taking pTerm into account. +** +** If the user explicitly specified a likelihood() value for this term, +** then the return value is the likelihood multiplied by the number of +** input rows. Otherwise, this function assumes that an "IS NOT NULL" term +** has a likelihood of 0.50, and any other term a likelihood of 0.25. +*/ +static LogEst whereRangeAdjust(WhereTerm *pTerm, LogEst nNew){ + LogEst nRet = nNew; + if( pTerm ){ + if( pTerm->truthProb<=0 ){ + nRet += pTerm->truthProb; + }else if( (pTerm->wtFlags & TERM_VNULL)==0 ){ + nRet -= 20; assert( 20==sqlite3LogEst(4) ); } + } + return nRet; +} - /* Initialize the rowset register to contain NULL. An SQL NULL is - ** equivalent to an empty rowset. Or, create an ephemeral index - ** capable of holding primary keys in the case of a WITHOUT ROWID. - ** - ** Also initialize regReturn to contain the address of the instruction - ** immediately following the OP_Return at the bottom of the loop. This - ** is required in a few obscure LEFT JOIN cases where control jumps - ** over the top of the loop into the body of it. In this case the - ** correct response for the end-of-loop code (the OP_Return) is to - ** fall through to the next instruction, just as an OP_Next does if - ** called on an uninitialized cursor. - */ - if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ - if( HasRowid(pTab) ){ - regRowset = ++pParse->nMem; - sqlite3VdbeAddOp2(v, OP_Null, 0, regRowset); - }else{ - Index *pPk = sqlite3PrimaryKeyIndex(pTab); - regRowset = pParse->nTab++; - sqlite3VdbeAddOp2(v, OP_OpenEphemeral, regRowset, pPk->nKeyCol); - sqlite3VdbeSetP4KeyInfo(pParse, pPk); - } - regRowid = ++pParse->nMem; - } - iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn); +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** This function is called to estimate the number of rows visited by a +** range-scan on a skip-scan index. For example: +** +** CREATE INDEX i1 ON t1(a, b, c); +** SELECT * FROM t1 WHERE a=? AND c BETWEEN ? AND ?; +** +** Value pLoop->nOut is currently set to the estimated number of rows +** visited for scanning (a=? AND b=?). This function reduces that estimate +** by some factor to account for the (c BETWEEN ? AND ?) expression based +** on the stat4 data for the index. this scan will be peformed multiple +** times (once for each (a,b) combination that matches a=?) is dealt with +** by the caller. +** +** It does this by scanning through all stat4 samples, comparing values +** extracted from pLower and pUpper with the corresponding column in each +** sample. If L and U are the number of samples found to be less than or +** equal to the values extracted from pLower and pUpper respectively, and +** N is the total number of samples, the pLoop->nOut value is adjusted +** as follows: +** +** nOut = nOut * ( min(U - L, 1) / N ) +** +** If pLower is NULL, or a value cannot be extracted from the term, L is +** set to zero. If pUpper is NULL, or a value cannot be extracted from it, +** U is set to N. +** +** Normally, this function sets *pbDone to 1 before returning. However, +** if no value can be extracted from either pLower or pUpper (and so the +** estimate of the number of rows delivered remains unchanged), *pbDone +** is left as is. +** +** If an error occurs, an SQLite error code is returned. Otherwise, +** SQLITE_OK. +*/ +static int whereRangeSkipScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop, /* Update the .nOut value of this loop */ + int *pbDone /* Set to true if at least one expr. value extracted */ +){ + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; + sqlite3 *db = pParse->db; + int nLower = -1; + int nUpper = p->nSample+1; + int rc = SQLITE_OK; + int iCol = p->aiColumn[nEq]; + u8 aff = iCol>=0 ? p->pTable->aCol[iCol].affinity : SQLITE_AFF_INTEGER; + CollSeq *pColl; + + sqlite3_value *p1 = 0; /* Value extracted from pLower */ + sqlite3_value *p2 = 0; /* Value extracted from pUpper */ + sqlite3_value *pVal = 0; /* Value extracted from record */ - /* If the original WHERE clause is z of the form: (x1 OR x2 OR ...) AND y - ** Then for every term xN, evaluate as the subexpression: xN AND z - ** That way, terms in y that are factored into the disjunction will - ** be picked up by the recursive calls to sqlite3WhereBegin() below. - ** - ** Actually, each subexpression is converted to "xN AND w" where w is - ** the "interesting" terms of z - terms that did not originate in the - ** ON or USING clause of a LEFT JOIN, and terms that are usable as - ** indices. - ** - ** This optimization also only applies if the (x1 OR x2 OR ...) term - ** is not contained in the ON clause of a LEFT JOIN. - ** See ticket http://www.sqlite.org/src/info/f2369304e4 - */ - if( pWC->nTerm>1 ){ - int iTerm; - for(iTerm=0; iTerm<pWC->nTerm; iTerm++){ - Expr *pExpr = pWC->a[iTerm].pExpr; - if( &pWC->a[iTerm] == pTerm ) continue; - if( ExprHasProperty(pExpr, EP_FromJoin) ) continue; - if( (pWC->a[iTerm].wtFlags & TERM_VIRTUAL)!=0 ) continue; - if( (pWC->a[iTerm].eOperator & WO_ALL)==0 ) continue; - testcase( pWC->a[iTerm].wtFlags & TERM_ORINFO ); - pExpr = sqlite3ExprDup(db, pExpr, 0); - pAndExpr = sqlite3ExprAnd(db, pAndExpr, pExpr); + pColl = sqlite3LocateCollSeq(pParse, p->azColl[nEq]); + if( pLower ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pLower->pExpr->pRight, aff, &p1); + nLower = 0; + } + if( pUpper && rc==SQLITE_OK ){ + rc = sqlite3Stat4ValueFromExpr(pParse, pUpper->pExpr->pRight, aff, &p2); + nUpper = p2 ? 0 : p->nSample; + } + + if( p1 || p2 ){ + int i; + int nDiff; + for(i=0; rc==SQLITE_OK && i<p->nSample; i++){ + rc = sqlite3Stat4Column(db, p->aSample[i].p, p->aSample[i].n, nEq, &pVal); + if( rc==SQLITE_OK && p1 ){ + int res = sqlite3MemCompare(p1, pVal, pColl); + if( res>=0 ) nLower++; } - if( pAndExpr ){ - pAndExpr = sqlite3PExpr(pParse, TK_AND, 0, pAndExpr, 0); + if( rc==SQLITE_OK && p2 ){ + int res = sqlite3MemCompare(p2, pVal, pColl); + if( res>=0 ) nUpper++; } } + nDiff = (nUpper - nLower); + if( nDiff<=0 ) nDiff = 1; - /* Run a separate WHERE clause for each term of the OR clause. After - ** eliminating duplicates from other WHERE clauses, the action for each - ** sub-WHERE clause is to to invoke the main loop body as a subroutine. - */ - wctrlFlags = WHERE_OMIT_OPEN_CLOSE - | WHERE_FORCE_TABLE - | WHERE_ONETABLE_ONLY - | WHERE_NO_AUTOINDEX; - for(ii=0; ii<pOrWc->nTerm; ii++){ - WhereTerm *pOrTerm = &pOrWc->a[ii]; - if( pOrTerm->leftCursor==iCur || (pOrTerm->eOperator & WO_AND)!=0 ){ - WhereInfo *pSubWInfo; /* Info for single OR-term scan */ - Expr *pOrExpr = pOrTerm->pExpr; /* Current OR clause term */ - int j1 = 0; /* Address of jump operation */ - if( pAndExpr && !ExprHasProperty(pOrExpr, EP_FromJoin) ){ - pAndExpr->pLeft = pOrExpr; - pOrExpr = pAndExpr; - } - /* Loop through table entries that match term pOrTerm. */ - WHERETRACE(0xffff, ("Subplan for OR-clause:\n")); - pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0, - wctrlFlags, iCovCur); - assert( pSubWInfo || pParse->nErr || db->mallocFailed ); - if( pSubWInfo ){ - WhereLoop *pSubLoop; - int addrExplain = explainOneScan( - pParse, pOrTab, &pSubWInfo->a[0], iLevel, pLevel->iFrom, 0 - ); - addScanStatus(v, pOrTab, &pSubWInfo->a[0], addrExplain); + /* If there is both an upper and lower bound specified, and the + ** comparisons indicate that they are close together, use the fallback + ** method (assume that the scan visits 1/64 of the rows) for estimating + ** the number of rows visited. Otherwise, estimate the number of rows + ** using the method described in the header comment for this function. */ + if( nDiff!=1 || pUpper==0 || pLower==0 ){ + int nAdjust = (sqlite3LogEst(p->nSample) - sqlite3LogEst(nDiff)); + pLoop->nOut -= nAdjust; + *pbDone = 1; + WHERETRACE(0x10, ("range skip-scan regions: %u..%u adjust=%d est=%d\n", + nLower, nUpper, nAdjust*-1, pLoop->nOut)); + } - /* This is the sub-WHERE clause body. First skip over - ** duplicate rows from prior sub-WHERE clauses, and record the - ** rowid (or PRIMARY KEY) for the current row so that the same - ** row will be skipped in subsequent sub-WHERE clauses. - */ - if( (pWInfo->wctrlFlags & WHERE_DUPLICATES_OK)==0 ){ - int r; - int iSet = ((ii==pOrWc->nTerm-1)?-1:ii); - if( HasRowid(pTab) ){ - r = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, regRowid, 0); - j1 = sqlite3VdbeAddOp4Int(v, OP_RowSetTest, regRowset, 0, r,iSet); - VdbeCoverage(v); - }else{ - Index *pPk = sqlite3PrimaryKeyIndex(pTab); - int nPk = pPk->nKeyCol; - int iPk; + }else{ + assert( *pbDone==0 ); + } - /* Read the PK into an array of temp registers. */ - r = sqlite3GetTempRange(pParse, nPk); - for(iPk=0; iPk<nPk; iPk++){ - int iCol = pPk->aiColumn[iPk]; - sqlite3ExprCodeGetColumn(pParse, pTab, iCol, iCur, r+iPk, 0); - } + sqlite3ValueFree(p1); + sqlite3ValueFree(p2); + sqlite3ValueFree(pVal); - /* Check if the temp table already contains this key. If so, - ** the row has already been included in the result set and - ** can be ignored (by jumping past the Gosub below). Otherwise, - ** insert the key into the temp table and proceed with processing - ** the row. - ** - ** Use some of the same optimizations as OP_RowSetTest: If iSet - ** is zero, assume that the key cannot already be present in - ** the temp table. And if iSet is -1, assume that there is no - ** need to insert the key into the temp table, as it will never - ** be tested for. */ - if( iSet ){ - j1 = sqlite3VdbeAddOp4Int(v, OP_Found, regRowset, 0, r, nPk); - VdbeCoverage(v); - } - if( iSet>=0 ){ - sqlite3VdbeAddOp3(v, OP_MakeRecord, r, nPk, regRowid); - sqlite3VdbeAddOp3(v, OP_IdxInsert, regRowset, regRowid, 0); - if( iSet ) sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT); - } + return rc; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ - /* Release the array of temp registers */ - sqlite3ReleaseTempRange(pParse, r, nPk); - } - } +/* +** This function is used to estimate the number of rows that will be visited +** by scanning an index for a range of values. The range may have an upper +** bound, a lower bound, or both. The WHERE clause terms that set the upper +** and lower bounds are represented by pLower and pUpper respectively. For +** example, assuming that index p is on t1(a): +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** |_____| |_____| +** | | +** pLower pUpper +** +** If either of the upper or lower bound is not present, then NULL is passed in +** place of the corresponding WhereTerm. +** +** The value in (pBuilder->pNew->u.btree.nEq) is the number of the index +** column subject to the range constraint. Or, equivalently, the number of +** equality constraints optimized by the proposed index scan. For example, +** assuming index p is on t1(a, b), and the SQL query is: +** +** ... FROM t1 WHERE a = ? AND b > ? AND b < ? ... +** +** then nEq is set to 1 (as the range restricted column, b, is the second +** left-most column of the index). Or, if the query is: +** +** ... FROM t1 WHERE a > ? AND a < ? ... +** +** then nEq is set to 0. +** +** When this function is called, *pnOut is set to the sqlite3LogEst() of the +** number of rows that the index scan is expected to visit without +** considering the range constraints. If nEq is 0, then *pnOut is the number of +** rows in the index. Assuming no error occurs, *pnOut is adjusted (reduced) +** to account for the range constraints pLower and pUpper. +** +** In the absence of sqlite_stat4 ANALYZE data, or if such data cannot be +** used, a single range inequality reduces the search space by a factor of 4. +** and a pair of constraints (x>? AND x<?) reduces the expected number of +** rows visited by a factor of 64. +*/ +static int whereRangeScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + WhereTerm *pLower, /* Lower bound on the range. ex: "x>123" Might be NULL */ + WhereTerm *pUpper, /* Upper bound on the range. ex: "x<455" Might be NULL */ + WhereLoop *pLoop /* Modify the .nOut and maybe .rRun fields */ +){ + int rc = SQLITE_OK; + int nOut = pLoop->nOut; + LogEst nNew; - /* Invoke the main loop body as a subroutine */ - sqlite3VdbeAddOp2(v, OP_Gosub, regReturn, iLoopBody); +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 + Index *p = pLoop->u.btree.pIndex; + int nEq = pLoop->u.btree.nEq; - /* Jump here (skipping the main loop body subroutine) if the - ** current sub-WHERE row is a duplicate from prior sub-WHEREs. */ - if( j1 ) sqlite3VdbeJumpHere(v, j1); + if( p->nSample>0 && nEq<p->nSampleCol ){ + if( nEq==pBuilder->nRecValid ){ + UnpackedRecord *pRec = pBuilder->pRec; + tRowcnt a[2]; + u8 aff; - /* The pSubWInfo->untestedTerms flag means that this OR term - ** contained one or more AND term from a notReady table. The - ** terms from the notReady table could not be tested and will - ** need to be tested later. - */ - if( pSubWInfo->untestedTerms ) untestedTerms = 1; + /* Variable iLower will be set to the estimate of the number of rows in + ** the index that are less than the lower bound of the range query. The + ** lower bound being the concatenation of $P and $L, where $P is the + ** key-prefix formed by the nEq values matched against the nEq left-most + ** columns of the index, and $L is the value in pLower. + ** + ** Or, if pLower is NULL or $L cannot be extracted from it (because it + ** is not a simple variable or literal value), the lower bound of the + ** range is $P. Due to a quirk in the way whereKeyStats() works, even + ** if $L is available, whereKeyStats() is called for both ($P) and + ** ($P:$L) and the larger of the two returned values is used. + ** + ** Similarly, iUpper is to be set to the estimate of the number of rows + ** less than the upper bound of the range query. Where the upper bound + ** is either ($P) or ($P:$U). Again, even if $U is available, both values + ** of iUpper are requested of whereKeyStats() and the smaller used. + ** + ** The number of rows between the two bounds is then just iUpper-iLower. + */ + tRowcnt iLower; /* Rows less than the lower bound */ + tRowcnt iUpper; /* Rows less than the upper bound */ + int iLwrIdx = -2; /* aSample[] for the lower bound */ + int iUprIdx = -1; /* aSample[] for the upper bound */ - /* If all of the OR-connected terms are optimized using the same - ** index, and the index is opened using the same cursor number - ** by each call to sqlite3WhereBegin() made by this loop, it may - ** be possible to use that index as a covering index. - ** - ** If the call to sqlite3WhereBegin() above resulted in a scan that - ** uses an index, and this is either the first OR-connected term - ** processed or the index is the same as that used by all previous - ** terms, set pCov to the candidate covering index. Otherwise, set - ** pCov to NULL to indicate that no candidate covering index will - ** be available. - */ - pSubLoop = pSubWInfo->a[0].pWLoop; - assert( (pSubLoop->wsFlags & WHERE_AUTO_INDEX)==0 ); - if( (pSubLoop->wsFlags & WHERE_INDEXED)!=0 - && (ii==0 || pSubLoop->u.btree.pIndex==pCov) - && (HasRowid(pTab) || !IsPrimaryKeyIndex(pSubLoop->u.btree.pIndex)) - ){ - assert( pSubWInfo->a[0].iIdxCur==iCovCur ); - pCov = pSubLoop->u.btree.pIndex; - wctrlFlags |= WHERE_REOPEN_IDX; - }else{ - pCov = 0; - } + if( pRec ){ + testcase( pRec->nField!=pBuilder->nRecValid ); + pRec->nField = pBuilder->nRecValid; + } + if( nEq==p->nKeyCol ){ + aff = SQLITE_AFF_INTEGER; + }else{ + aff = p->pTable->aCol[p->aiColumn[nEq]].affinity; + } + /* Determine iLower and iUpper using ($P) only. */ + if( nEq==0 ){ + iLower = 0; + iUpper = p->nRowEst0; + }else{ + /* Note: this call could be optimized away - since the same values must + ** have been requested when testing key $P in whereEqualScanEst(). */ + whereKeyStats(pParse, p, pRec, 0, a); + iLower = a[0]; + iUpper = a[0] + a[1]; + } - /* Finish the loop through table entries that match term pOrTerm. */ - sqlite3WhereEnd(pSubWInfo); + assert( pLower==0 || (pLower->eOperator & (WO_GT|WO_GE))!=0 ); + assert( pUpper==0 || (pUpper->eOperator & (WO_LT|WO_LE))!=0 ); + assert( p->aSortOrder!=0 ); + if( p->aSortOrder[nEq] ){ + /* The roles of pLower and pUpper are swapped for a DESC index */ + SWAP(WhereTerm*, pLower, pUpper); + } + + /* If possible, improve on the iLower estimate using ($P:$L). */ + if( pLower ){ + int bOk; /* True if value is extracted from pExpr */ + Expr *pExpr = pLower->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + iLwrIdx = whereKeyStats(pParse, p, pRec, 0, a); + iNew = a[0] + ((pLower->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); + if( iNew>iLower ) iLower = iNew; + nOut--; + pLower = 0; } } - } - pLevel->u.pCovidx = pCov; - if( pCov ) pLevel->iIdxCur = iCovCur; - if( pAndExpr ){ - pAndExpr->pLeft = 0; - sqlite3ExprDelete(db, pAndExpr); - } - sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v)); - sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk); - sqlite3VdbeResolveLabel(v, iLoopBody); - if( pWInfo->nLevel>1 ) sqlite3StackFree(db, pOrTab); - if( !untestedTerms ) disableTerm(pLevel, pTerm); - }else -#endif /* SQLITE_OMIT_OR_OPTIMIZATION */ + /* If possible, improve on the iUpper estimate using ($P:$U). */ + if( pUpper ){ + int bOk; /* True if value is extracted from pExpr */ + Expr *pExpr = pUpper->pExpr->pRight; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq, &bOk); + if( rc==SQLITE_OK && bOk ){ + tRowcnt iNew; + iUprIdx = whereKeyStats(pParse, p, pRec, 1, a); + iNew = a[0] + ((pUpper->eOperator & (WO_GT|WO_LE)) ? a[1] : 0); + if( iNew<iUpper ) iUpper = iNew; + nOut--; + pUpper = 0; + } + } - { - /* Case 6: There is no usable index. We must do a complete - ** scan of the entire table. - */ - static const u8 aStep[] = { OP_Next, OP_Prev }; - static const u8 aStart[] = { OP_Rewind, OP_Last }; - assert( bRev==0 || bRev==1 ); - if( pTabItem->isRecursive ){ - /* Tables marked isRecursive have only a single row that is stored in - ** a pseudo-cursor. No need to Rewind or Next such cursors. */ - pLevel->op = OP_Noop; + pBuilder->pRec = pRec; + if( rc==SQLITE_OK ){ + if( iUpper>iLower ){ + nNew = sqlite3LogEst(iUpper - iLower); + /* TUNING: If both iUpper and iLower are derived from the same + ** sample, then assume they are 4x more selective. This brings + ** the estimated selectivity more in line with what it would be + ** if estimated without the use of STAT3/4 tables. */ + if( iLwrIdx==iUprIdx ) nNew -= 20; assert( 20==sqlite3LogEst(4) ); + }else{ + nNew = 10; assert( 10==sqlite3LogEst(2) ); + } + if( nNew<nOut ){ + nOut = nNew; + } + WHERETRACE(0x10, ("STAT4 range scan: %u..%u est=%d\n", + (u32)iLower, (u32)iUpper, nOut)); + } }else{ - pLevel->op = aStep[bRev]; - pLevel->p1 = iCur; - pLevel->p2 = 1 + sqlite3VdbeAddOp2(v, aStart[bRev], iCur, addrBrk); - VdbeCoverageIf(v, bRev==0); - VdbeCoverageIf(v, bRev!=0); - pLevel->p5 = SQLITE_STMTSTATUS_FULLSCAN_STEP; + int bDone = 0; + rc = whereRangeSkipScanEst(pParse, pLower, pUpper, pLoop, &bDone); + if( bDone ) return rc; } } +#else + UNUSED_PARAMETER(pParse); + UNUSED_PARAMETER(pBuilder); + assert( pLower || pUpper ); +#endif + assert( pUpper==0 || (pUpper->wtFlags & TERM_VNULL)==0 ); + nNew = whereRangeAdjust(pLower, nOut); + nNew = whereRangeAdjust(pUpper, nNew); + + /* TUNING: If there is both an upper and lower limit and neither limit + ** has an application-defined likelihood(), assume the range is + ** reduced by an additional 75%. This means that, by default, an open-ended + ** range query (e.g. col > ?) is assumed to match 1/4 of the rows in the + ** index. While a closed range (e.g. col BETWEEN ? AND ?) is estimated to + ** match 1/64 of the index. */ + if( pLower && pLower->truthProb>0 && pUpper && pUpper->truthProb>0 ){ + nNew -= 20; + } -#ifdef SQLITE_ENABLE_STMT_SCANSTATUS - pLevel->addrVisit = sqlite3VdbeCurrentAddr(v); + nOut -= (pLower!=0) + (pUpper!=0); + if( nNew<10 ) nNew = 10; + if( nNew<nOut ) nOut = nNew; +#if defined(WHERETRACE_ENABLED) + if( pLoop->nOut>nOut ){ + WHERETRACE(0x10,("Range scan lowers nOut from %d to %d\n", + pLoop->nOut, nOut)); + } #endif + pLoop->nOut = (LogEst)nOut; + return rc; +} - /* Insert code to test every subexpression that can be completely - ** computed using the current set of tables. - */ - for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ - Expr *pE; - int skipLikeAddr = 0; - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - testcase( pTerm->wtFlags & TERM_CODED ); - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ - testcase( pWInfo->untestedTerms==0 - && (pWInfo->wctrlFlags & WHERE_ONETABLE_ONLY)!=0 ); - pWInfo->untestedTerms = 1; - continue; - } - pE = pTerm->pExpr; - assert( pE!=0 ); - if( pLevel->iLeftJoin && !ExprHasProperty(pE, EP_FromJoin) ){ - continue; - } - if( pTerm->wtFlags & TERM_LIKECOND ){ - assert( pLevel->iLikeRepCntr>0 ); - skipLikeAddr = sqlite3VdbeAddOp1(v, OP_IfNot, pLevel->iLikeRepCntr); - VdbeCoverage(v); - } - sqlite3ExprIfFalse(pParse, pE, addrCont, SQLITE_JUMPIFNULL); - if( skipLikeAddr ) sqlite3VdbeJumpHere(v, skipLikeAddr); - pTerm->wtFlags |= TERM_CODED; +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an equality constraint x=VALUE and where that VALUE occurs in +** the histogram data. This only works when x is the left-most +** column of an index and sqlite_stat3 histogram data is available +** for that index. When pExpr==NULL that means the constraint is +** "x IS NULL" instead of "x=VALUE". +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereEqualScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + Expr *pExpr, /* Expression for VALUE in the x=VALUE constraint */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + int nEq = pBuilder->pNew->u.btree.nEq; + UnpackedRecord *pRec = pBuilder->pRec; + u8 aff; /* Column affinity */ + int rc; /* Subfunction return code */ + tRowcnt a[2]; /* Statistics */ + int bOk; + + assert( nEq>=1 ); + assert( nEq<=p->nColumn ); + assert( p->aSample!=0 ); + assert( p->nSample>0 ); + assert( pBuilder->nRecValid<nEq ); + + /* If values are not available for all fields of the index to the left + ** of this one, no estimate can be made. Return SQLITE_NOTFOUND. */ + if( pBuilder->nRecValid<(nEq-1) ){ + return SQLITE_NOTFOUND; } - /* Insert code to test for implied constraints based on transitivity - ** of the "==" operator. - ** - ** Example: If the WHERE clause contains "t1.a=t2.b" and "t2.b=123" - ** and we are coding the t1 loop and the t2 loop has not yet coded, - ** then we cannot use the "t1.a=t2.b" constraint, but we can code - ** the implied "t1.a=123" constraint. - */ - for(pTerm=pWC->a, j=pWC->nTerm; j>0; j--, pTerm++){ - Expr *pE, *pEAlt; - WhereTerm *pAlt; - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( pTerm->eOperator!=(WO_EQUIV|WO_EQ) ) continue; - if( pTerm->leftCursor!=iCur ) continue; - if( pLevel->iLeftJoin ) continue; - pE = pTerm->pExpr; - assert( !ExprHasProperty(pE, EP_FromJoin) ); - assert( (pTerm->prereqRight & pLevel->notReady)!=0 ); - pAlt = findTerm(pWC, iCur, pTerm->u.leftColumn, notReady, WO_EQ|WO_IN, 0); - if( pAlt==0 ) continue; - if( pAlt->wtFlags & (TERM_CODED) ) continue; - testcase( pAlt->eOperator & WO_EQ ); - testcase( pAlt->eOperator & WO_IN ); - VdbeModuleComment((v, "begin transitive constraint")); - pEAlt = sqlite3StackAllocRaw(db, sizeof(*pEAlt)); - if( pEAlt ){ - *pEAlt = *pAlt->pExpr; - pEAlt->pLeft = pE->pLeft; - sqlite3ExprIfFalse(pParse, pEAlt, addrCont, SQLITE_JUMPIFNULL); - sqlite3StackFree(db, pEAlt); - } + /* This is an optimization only. The call to sqlite3Stat4ProbeSetValue() + ** below would return the same value. */ + if( nEq>=p->nColumn ){ + *pnRow = 1; + return SQLITE_OK; } - /* For a LEFT OUTER JOIN, generate code that will record the fact that - ** at least one row of the right table has matched the left table. - */ - if( pLevel->iLeftJoin ){ - pLevel->addrFirst = sqlite3VdbeCurrentAddr(v); - sqlite3VdbeAddOp2(v, OP_Integer, 1, pLevel->iLeftJoin); - VdbeComment((v, "record LEFT JOIN hit")); - sqlite3ExprCacheClear(pParse); - for(pTerm=pWC->a, j=0; j<pWC->nTerm; j++, pTerm++){ - testcase( pTerm->wtFlags & TERM_VIRTUAL ); - testcase( pTerm->wtFlags & TERM_CODED ); - if( pTerm->wtFlags & (TERM_VIRTUAL|TERM_CODED) ) continue; - if( (pTerm->prereqAll & pLevel->notReady)!=0 ){ - assert( pWInfo->untestedTerms ); - continue; - } - assert( pTerm->pExpr ); - sqlite3ExprIfFalse(pParse, pTerm->pExpr, addrCont, SQLITE_JUMPIFNULL); - pTerm->wtFlags |= TERM_CODED; - } + aff = p->pTable->aCol[p->aiColumn[nEq-1]].affinity; + rc = sqlite3Stat4ProbeSetValue(pParse, p, &pRec, pExpr, aff, nEq-1, &bOk); + pBuilder->pRec = pRec; + if( rc!=SQLITE_OK ) return rc; + if( bOk==0 ) return SQLITE_NOTFOUND; + pBuilder->nRecValid = nEq; + + whereKeyStats(pParse, p, pRec, 0, a); + WHERETRACE(0x10,("equality scan regions: %d\n", (int)a[1])); + *pnRow = a[1]; + + return rc; +} +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + +#ifdef SQLITE_ENABLE_STAT3_OR_STAT4 +/* +** Estimate the number of rows that will be returned based on +** an IN constraint where the right-hand side of the IN operator +** is a list of values. Example: +** +** WHERE x IN (1,2,3,4) +** +** Write the estimated row count into *pnRow and return SQLITE_OK. +** If unable to make an estimate, leave *pnRow unchanged and return +** non-zero. +** +** This routine can fail if it is unable to load a collating sequence +** required for string comparison, or if unable to allocate memory +** for a UTF conversion required for comparison. The error is stored +** in the pParse structure. +*/ +static int whereInScanEst( + Parse *pParse, /* Parsing & code generating context */ + WhereLoopBuilder *pBuilder, + ExprList *pList, /* The value list on the RHS of "x IN (v1,v2,v3,...)" */ + tRowcnt *pnRow /* Write the revised row estimate here */ +){ + Index *p = pBuilder->pNew->u.btree.pIndex; + i64 nRow0 = sqlite3LogEstToInt(p->aiRowLogEst[0]); + int nRecValid = pBuilder->nRecValid; + int rc = SQLITE_OK; /* Subfunction return code */ + tRowcnt nEst; /* Number of rows for a single term */ + tRowcnt nRowEst = 0; /* New estimate of the number of rows */ + int i; /* Loop counter */ + + assert( p->aSample!=0 ); + for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){ + nEst = nRow0; + rc = whereEqualScanEst(pParse, pBuilder, pList->a[i].pExpr, &nEst); + nRowEst += nEst; + pBuilder->nRecValid = nRecValid; } - return pLevel->notReady; + if( rc==SQLITE_OK ){ + if( nRowEst > nRow0 ) nRowEst = nRow0; + *pnRow = nRowEst; + WHERETRACE(0x10,("IN row estimate: est=%d\n", nRowEst)); + } + assert( pBuilder->nRecValid==nRecValid ); + return rc; } +#endif /* SQLITE_ENABLE_STAT3_OR_STAT4 */ + #ifdef WHERETRACE_ENABLED /* @@ -120331,9 +121724,10 @@ static void whereTermPrint(WhereTerm *pTerm, int iTerm){ if( pTerm->wtFlags & TERM_VIRTUAL ) zType[0] = 'V'; if( pTerm->eOperator & WO_EQUIV ) zType[1] = 'E'; if( ExprHasProperty(pTerm->pExpr, EP_FromJoin) ) zType[2] = 'L'; - sqlite3DebugPrintf("TERM-%-3d %p %s cursor=%-3d prob=%-3d op=0x%03x\n", - iTerm, pTerm, zType, pTerm->leftCursor, pTerm->truthProb, - pTerm->eOperator); + sqlite3DebugPrintf( + "TERM-%-3d %p %s cursor=%-3d prob=%-3d op=0x%03x wtFlags=0x%04x\n", + iTerm, pTerm, zType, pTerm->leftCursor, pTerm->truthProb, + pTerm->eOperator, pTerm->wtFlags); sqlite3TreeViewExpr(0, pTerm->pExpr, 0); } } @@ -120482,7 +121876,7 @@ static void whereInfoFree(sqlite3 *db, WhereInfo *pWInfo){ sqlite3DbFree(db, pLevel->u.in.aInLoop); } } - whereClauseClear(&pWInfo->sWC); + sqlite3WhereClauseClear(&pWInfo->sWC); while( pWInfo->pLoops ){ WhereLoop *p = pWInfo->pLoops; pWInfo->pLoops = p->pNextLoop; @@ -120823,8 +122217,9 @@ static void whereLoopOutputAdjust( /* In the absence of explicit truth probabilities, use heuristics to ** guess a reasonable truth probability. */ pLoop->nOut--; - if( pTerm->eOperator&WO_EQ ){ + if( pTerm->eOperator&(WO_EQ|WO_IS) ){ Expr *pRight = pTerm->pExpr->pRight; + testcase( pTerm->pExpr->op==TK_IS ); if( sqlite3ExprIsInteger(pRight, &k) && k>=(-1) && k<=1 ){ k = 10; }else{ @@ -120892,10 +122287,10 @@ static int whereLoopAddBtreeIndex( assert( (pNew->wsFlags & WHERE_TOP_LIMIT)==0 ); if( pNew->wsFlags & WHERE_BTM_LIMIT ){ opMask = WO_LT|WO_LE; - }else if( pProbe->tnum<=0 || (pSrc->jointype & JT_LEFT)!=0 ){ + }else if( /*pProbe->tnum<=0 ||*/ (pSrc->jointype & JT_LEFT)!=0 ){ opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE; }else{ - opMask = WO_EQ|WO_IN|WO_ISNULL|WO_GT|WO_GE|WO_LT|WO_LE; + opMask = WO_EQ|WO_IN|WO_GT|WO_GE|WO_LT|WO_LE|WO_ISNULL|WO_IS; } if( pProbe->bUnordered ) opMask &= ~(WO_GT|WO_GE|WO_LT|WO_LE); @@ -120958,7 +122353,7 @@ static int whereLoopAddBtreeIndex( assert( nIn>0 ); /* RHS always has 2 or more terms... The parser ** changes "x IN (?)" into "x=?". */ - }else if( eOp & (WO_EQ) ){ + }else if( eOp & (WO_EQ|WO_IS) ){ pNew->wsFlags |= WHERE_COLUMN_EQ; if( iCol<0 || (nInMul==0 && pNew->u.btree.nEq==pProbe->nKeyCol-1) ){ if( iCol>=0 && pProbe->uniqNotNull==0 ){ @@ -121008,7 +122403,7 @@ static int whereLoopAddBtreeIndex( whereRangeScanEst(pParse, pBuilder, pBtm, pTop, pNew); }else{ int nEq = ++pNew->u.btree.nEq; - assert( eOp & (WO_ISNULL|WO_EQ|WO_IN) ); + assert( eOp & (WO_ISNULL|WO_EQ|WO_IN|WO_IS) ); assert( pNew->nOut==saved_nOut ); if( pTerm->truthProb<=0 && iCol>=0 ){ @@ -121025,8 +122420,9 @@ static int whereLoopAddBtreeIndex( && ((eOp & WO_IN)==0 || !ExprHasProperty(pTerm->pExpr, EP_xIsSelect)) ){ Expr *pExpr = pTerm->pExpr; - if( (eOp & (WO_EQ|WO_ISNULL))!=0 ){ + if( (eOp & (WO_EQ|WO_ISNULL|WO_IS))!=0 ){ testcase( eOp & WO_EQ ); + testcase( eOp & WO_IS ); testcase( eOp & WO_ISNULL ); rc = whereEqualScanEst(pParse, pBuilder, pExpr->pRight, &nOut); }else{ @@ -121295,15 +122691,14 @@ static int whereLoopAddBtree( #ifndef SQLITE_OMIT_AUTOMATIC_INDEX /* Automatic indexes */ - if( !pBuilder->pOrSet + if( !pBuilder->pOrSet /* Not part of an OR optimization */ && (pWInfo->wctrlFlags & WHERE_NO_AUTOINDEX)==0 && (pWInfo->pParse->db->flags & SQLITE_AutoIndex)!=0 - && pSrc->pIndex==0 - && !pSrc->viaCoroutine - && !pSrc->notIndexed - && HasRowid(pTab) - && !pSrc->isCorrelated - && !pSrc->isRecursive + && pSrc->pIndex==0 /* Has no INDEXED BY clause */ + && !pSrc->notIndexed /* Has no NOT INDEXED clause */ + && HasRowid(pTab) /* Is not a WITHOUT ROWID table. (FIXME: Why not?) */ + && !pSrc->isCorrelated /* Not a correlated subquery */ + && !pSrc->isRecursive /* Not a recursive common table expression. */ ){ /* Generate auto-index WhereLoops */ WhereTerm *pTerm; @@ -121433,10 +122828,32 @@ static int whereLoopAddBtree( /* ** Add all WhereLoop objects for a table of the join identified by ** pBuilder->pNew->iTab. That table is guaranteed to be a virtual table. +** +** If there are no LEFT or CROSS JOIN joins in the query, both mExtra and +** mUnusable are set to 0. Otherwise, mExtra is a mask of all FROM clause +** entries that occur before the virtual table in the FROM clause and are +** separated from it by at least one LEFT or CROSS JOIN. Similarly, the +** mUnusable mask contains all FROM clause entries that occur after the +** virtual table and are separated from it by at least one LEFT or +** CROSS JOIN. +** +** For example, if the query were: +** +** ... FROM t1, t2 LEFT JOIN t3, t4, vt CROSS JOIN t5, t6; +** +** then mExtra corresponds to (t1, t2) and mUnusable to (t5, t6). +** +** All the tables in mExtra must be scanned before the current virtual +** table. So any terms for which all prerequisites are satisfied by +** mExtra may be specified as "usable" in all calls to xBestIndex. +** Conversely, all tables in mUnusable must be scanned after the current +** virtual table, so any terms for which the prerequisites overlap with +** mUnusable should always be configured as "not-usable" for xBestIndex. */ static int whereLoopAddVirtual( WhereLoopBuilder *pBuilder, /* WHERE clause information */ - Bitmask mExtra + Bitmask mExtra, /* Tables that must be scanned before this one */ + Bitmask mUnusable /* Tables that must be scanned after this one */ ){ WhereInfo *pWInfo; /* WHERE analysis context */ Parse *pParse; /* The parsing context */ @@ -121457,6 +122874,7 @@ static int whereLoopAddVirtual( WhereLoop *pNew; int rc = SQLITE_OK; + assert( (mExtra & mUnusable)==0 ); pWInfo = pBuilder->pWInfo; pParse = pWInfo->pParse; db = pParse->db; @@ -121465,7 +122883,7 @@ static int whereLoopAddVirtual( pSrc = &pWInfo->pTabList->a[pNew->iTab]; pTab = pSrc->pTab; assert( IsVirtual(pTab) ); - pIdxInfo = allocateIndexInfo(pParse, pWC, pSrc, pBuilder->pOrderBy); + pIdxInfo = allocateIndexInfo(pParse, pWC, mUnusable, pSrc,pBuilder->pOrderBy); if( pIdxInfo==0 ) return SQLITE_NOMEM; pNew->prereq = 0; pNew->rSetup = 0; @@ -121495,7 +122913,7 @@ static int whereLoopAddVirtual( if( (pTerm->eOperator & WO_IN)!=0 ){ seenIn = 1; } - if( pTerm->prereqRight!=0 ){ + if( (pTerm->prereqRight & ~mExtra)!=0 ){ seenVar = 1; }else if( (pTerm->eOperator & WO_IN)==0 ){ pIdxCons->usable = 1; @@ -121503,7 +122921,7 @@ static int whereLoopAddVirtual( break; case 1: /* Constants with IN operators */ assert( seenIn ); - pIdxCons->usable = (pTerm->prereqRight==0); + pIdxCons->usable = (pTerm->prereqRight & ~mExtra)==0; break; case 2: /* Variables without IN */ assert( seenVar ); @@ -121602,7 +123020,11 @@ whereLoopAddVtab_exit: ** Add WhereLoop entries to handle OR terms. This works for either ** btrees or virtual tables. */ -static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ +static int whereLoopAddOr( + WhereLoopBuilder *pBuilder, + Bitmask mExtra, + Bitmask mUnusable +){ WhereInfo *pWInfo = pBuilder->pWInfo; WhereClause *pWC; WhereLoop *pNew; @@ -121661,14 +123083,14 @@ static int whereLoopAddOr(WhereLoopBuilder *pBuilder, Bitmask mExtra){ #endif #ifndef SQLITE_OMIT_VIRTUALTABLE if( IsVirtual(pItem->pTab) ){ - rc = whereLoopAddVirtual(&sSubBuild, mExtra); + rc = whereLoopAddVirtual(&sSubBuild, mExtra, mUnusable); }else #endif { rc = whereLoopAddBtree(&sSubBuild, mExtra); } if( rc==SQLITE_OK ){ - rc = whereLoopAddOr(&sSubBuild, mExtra); + rc = whereLoopAddOr(&sSubBuild, mExtra, mUnusable); } assert( rc==SQLITE_OK || sCur.n==0 ); if( sCur.n==0 ){ @@ -121730,33 +123152,43 @@ static int whereLoopAddAll(WhereLoopBuilder *pBuilder){ int iTab; SrcList *pTabList = pWInfo->pTabList; struct SrcList_item *pItem; + struct SrcList_item *pEnd = &pTabList->a[pWInfo->nLevel]; sqlite3 *db = pWInfo->pParse->db; - int nTabList = pWInfo->nLevel; int rc = SQLITE_OK; - u8 priorJoinType = 0; WhereLoop *pNew; + u8 priorJointype = 0; /* Loop over the tables in the join, from left to right */ pNew = pBuilder->pNew; whereLoopInit(pNew); - for(iTab=0, pItem=pTabList->a; iTab<nTabList; iTab++, pItem++){ + for(iTab=0, pItem=pTabList->a; pItem<pEnd; iTab++, pItem++){ + Bitmask mUnusable = 0; pNew->iTab = iTab; - pNew->maskSelf = getMask(&pWInfo->sMaskSet, pItem->iCursor); - if( ((pItem->jointype|priorJoinType) & (JT_LEFT|JT_CROSS))!=0 ){ + pNew->maskSelf = sqlite3WhereGetMask(&pWInfo->sMaskSet, pItem->iCursor); + if( ((pItem->jointype|priorJointype) & (JT_LEFT|JT_CROSS))!=0 ){ + /* This condition is true when pItem is the FROM clause term on the + ** right-hand-side of a LEFT or CROSS JOIN. */ mExtra = mPrior; } - priorJoinType = pItem->jointype; + priorJointype = pItem->jointype; if( IsVirtual(pItem->pTab) ){ - rc = whereLoopAddVirtual(pBuilder, mExtra); + struct SrcList_item *p; + for(p=&pItem[1]; p<pEnd; p++){ + if( mUnusable || (p->jointype & (JT_LEFT|JT_CROSS)) ){ + mUnusable |= sqlite3WhereGetMask(&pWInfo->sMaskSet, p->iCursor); + } + } + rc = whereLoopAddVirtual(pBuilder, mExtra, mUnusable); }else{ rc = whereLoopAddBtree(pBuilder, mExtra); } if( rc==SQLITE_OK ){ - rc = whereLoopAddOr(pBuilder, mExtra); + rc = whereLoopAddOr(pBuilder, mExtra, mUnusable); } mPrior |= pNew->maskSelf; if( rc || db->mallocFailed ) break; } + whereLoopClear(db, pNew); return rc; } @@ -121862,10 +123294,10 @@ static i8 wherePathSatisfiesOrderBy( pOBExpr = sqlite3ExprSkipCollate(pOrderBy->a[i].pExpr); if( pOBExpr->op!=TK_COLUMN ) continue; if( pOBExpr->iTable!=iCur ) continue; - pTerm = findTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, - ~ready, WO_EQ|WO_ISNULL, 0); + pTerm = sqlite3WhereFindTerm(&pWInfo->sWC, iCur, pOBExpr->iColumn, + ~ready, WO_EQ|WO_ISNULL|WO_IS, 0); if( pTerm==0 ) continue; - if( (pTerm->eOperator&WO_EQ)!=0 && pOBExpr->iColumn>=0 ){ + if( (pTerm->eOperator&(WO_EQ|WO_IS))!=0 && pOBExpr->iColumn>=0 ){ const char *z1, *z2; pColl = sqlite3ExprCollSeq(pWInfo->pParse, pOrderBy->a[i].pExpr); if( !pColl ) pColl = db->pDfltColl; @@ -121874,6 +123306,7 @@ static i8 wherePathSatisfiesOrderBy( if( !pColl ) pColl = db->pDfltColl; z2 = pColl->zName; if( sqlite3StrICmp(z1, z2)!=0 ) continue; + testcase( pTerm->pExpr->op==TK_IS ); } obSat |= MASKBIT(i); } @@ -121904,7 +123337,7 @@ static i8 wherePathSatisfiesOrderBy( /* Skip over == and IS NULL terms */ if( j<pLoop->u.btree.nEq && pLoop->nSkip==0 - && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL))!=0 + && ((i = pLoop->aLTerm[j]->eOperator) & (WO_EQ|WO_ISNULL|WO_IS))!=0 ){ if( i & WO_ISNULL ){ testcase( isOrderDistinct ); @@ -121998,7 +123431,7 @@ static i8 wherePathSatisfiesOrderBy( Bitmask mTerm; if( MASKBIT(i) & obSat ) continue; p = pOrderBy->a[i].pExpr; - mTerm = exprTableUsage(&pWInfo->sMaskSet,p); + mTerm = sqlite3WhereExprUsage(&pWInfo->sMaskSet,p); if( mTerm==0 && !sqlite3ExprIsConstant(p) ) continue; if( (mTerm&~orderDistinctMask)==0 ){ obSat |= MASKBIT(i); @@ -122471,14 +123904,15 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pItem = pWInfo->pTabList->a; pTab = pItem->pTab; if( IsVirtual(pTab) ) return 0; - if( pItem->zIndex ) return 0; + if( pItem->zIndexedBy ) return 0; iCur = pItem->iCursor; pWC = &pWInfo->sWC; pLoop = pBuilder->pNew; pLoop->wsFlags = 0; pLoop->nSkip = 0; - pTerm = findTerm(pWC, iCur, -1, 0, WO_EQ, 0); + pTerm = sqlite3WhereFindTerm(pWC, iCur, -1, 0, WO_EQ|WO_IS, 0); if( pTerm ){ + testcase( pTerm->eOperator & WO_IS ); pLoop->wsFlags = WHERE_COLUMN_EQ|WHERE_IPK|WHERE_ONEROW; pLoop->aLTerm[0] = pTerm; pLoop->nLTerm = 1; @@ -122487,14 +123921,17 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ pLoop->rRun = 33; /* 33==sqlite3LogEst(10) */ }else{ for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){ + int opMask; assert( pLoop->aLTermSpace==pLoop->aLTerm ); if( !IsUniqueIndex(pIdx) || pIdx->pPartIdxWhere!=0 || pIdx->nKeyCol>ArraySize(pLoop->aLTermSpace) ) continue; + opMask = pIdx->uniqNotNull ? (WO_EQ|WO_IS) : WO_EQ; for(j=0; j<pIdx->nKeyCol; j++){ - pTerm = findTerm(pWC, iCur, pIdx->aiColumn[j], 0, WO_EQ, pIdx); + pTerm = sqlite3WhereFindTerm(pWC, iCur, pIdx->aiColumn[j], 0, opMask, pIdx); if( pTerm==0 ) break; + testcase( pTerm->eOperator & WO_IS ); pLoop->aLTerm[j] = pTerm; } if( j!=pIdx->nKeyCol ) continue; @@ -122513,7 +123950,7 @@ static int whereShortCut(WhereLoopBuilder *pBuilder){ if( pLoop->wsFlags ){ pLoop->nOut = (LogEst)1; pWInfo->a[0].pWLoop = pLoop; - pLoop->maskSelf = getMask(&pWInfo->sMaskSet, iCur); + pLoop->maskSelf = sqlite3WhereGetMask(&pWInfo->sMaskSet, iCur); pWInfo->a[0].iTabCur = iCur; pWInfo->nRowOut = 1; if( pWInfo->pOrderBy ) pWInfo->nOBSat = pWInfo->pOrderBy->nExpr; @@ -122707,8 +124144,8 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( ** subexpression is separated by an AND operator. */ initMaskSet(pMaskSet); - whereClauseInit(&pWInfo->sWC, pWInfo); - whereSplit(&pWInfo->sWC, pWhere, TK_AND); + sqlite3WhereClauseInit(&pWInfo->sWC, pWInfo); + sqlite3WhereSplit(&pWInfo->sWC, pWhere, TK_AND); /* Special case: a WHERE clause that is constant. Evaluate the ** expression and either jump over all of the code or fall thru. @@ -122753,22 +124190,16 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( { Bitmask toTheLeft = 0; for(ii=0; ii<pTabList->nSrc; ii++){ - Bitmask m = getMask(pMaskSet, pTabList->a[ii].iCursor); + Bitmask m = sqlite3WhereGetMask(pMaskSet, pTabList->a[ii].iCursor); assert( (m-1)==toTheLeft ); toTheLeft |= m; } } #endif - /* Analyze all of the subexpressions. Note that exprAnalyze() might - ** add new virtual terms onto the end of the WHERE clause. We do not - ** want to analyze these virtual terms, so start analyzing at the end - ** and work forward so that the added virtual terms are never processed. - */ - exprAnalyzeAll(pTabList, &pWInfo->sWC); - if( db->mallocFailed ){ - goto whereBeginError; - } + /* Analyze all of the subexpressions. */ + sqlite3WhereExprAnalyze(pTabList, &pWInfo->sWC); + if( db->mallocFailed ) goto whereBeginError; if( wctrlFlags & WHERE_WANT_DISTINCT ){ if( isDistinctRedundant(pParse, pTabList, &pWInfo->sWC, pResultSet) ){ @@ -122784,8 +124215,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* Construct the WhereLoop objects */ WHERETRACE(0xffff,("*** Optimizer Start ***\n")); #if defined(WHERETRACE_ENABLED) - /* Display all terms of the WHERE clause */ - if( sqlite3WhereTrace & 0x100 ){ + if( sqlite3WhereTrace & 0x100 ){ /* Display all terms of the WHERE clause */ int i; for(i=0; i<sWLB.pWC->nTerm; i++){ whereTermPrint(&sWLB.pWC->a[i], i); @@ -122797,13 +124227,12 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( rc = whereLoopAddAll(&sWLB); if( rc ) goto whereBeginError; - /* Display all of the WhereLoop objects if wheretrace is enabled */ -#ifdef WHERETRACE_ENABLED /* !=0 */ - if( sqlite3WhereTrace ){ +#ifdef WHERETRACE_ENABLED + if( sqlite3WhereTrace ){ /* Display all of the WhereLoop objects */ WhereLoop *p; int i; - static char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" - "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; + static const char zLabel[] = "0123456789abcdefghijklmnopqrstuvwyxz" + "ABCDEFGHIJKLMNOPQRSTUVWYXZ"; for(p=pWInfo->pLoops, i=0; p; p=p->pNextLoop, i++){ p->cId = zLabel[i%sizeof(zLabel)]; whereLoopPrint(p, sWLB.pWC); @@ -122824,7 +124253,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( if( pParse->nErr || NEVER(db->mallocFailed) ){ goto whereBeginError; } -#ifdef WHERETRACE_ENABLED /* !=0 */ +#ifdef WHERETRACE_ENABLED if( sqlite3WhereTrace ){ sqlite3DebugPrintf("---- Solution nRow=%d", pWInfo->nRowOut); if( pWInfo->nOBSat>0 ){ @@ -122855,8 +124284,10 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( && pResultSet!=0 && OptimizationEnabled(db, SQLITE_OmitNoopJoin) ){ - Bitmask tabUsed = exprListTableUsage(pMaskSet, pResultSet); - if( sWLB.pOrderBy ) tabUsed |= exprListTableUsage(pMaskSet, sWLB.pOrderBy); + Bitmask tabUsed = sqlite3WhereExprListUsage(pMaskSet, pResultSet); + if( sWLB.pOrderBy ){ + tabUsed |= sqlite3WhereExprListUsage(pMaskSet, sWLB.pOrderBy); + } while( pWInfo->nLevel>=2 ){ WhereTerm *pTerm, *pEnd; pLoop = pWInfo->a[pWInfo->nLevel-1].pWLoop; @@ -122887,7 +124318,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* If the caller is an UPDATE or DELETE statement that is requesting ** to use a one-pass algorithm, determine if this is appropriate. ** The one-pass algorithm only works if the WHERE clause constrains - ** the statement to update a single row. + ** the statement to update or delete a single row. */ assert( (wctrlFlags & WHERE_ONEPASS_DESIRED)==0 || pWInfo->nLevel==1 ); if( (wctrlFlags & WHERE_ONEPASS_DESIRED)!=0 @@ -122901,7 +124332,6 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( /* Open all tables in the pTabList and any indices selected for ** searching those tables. */ - notReady = ~(Bitmask)0; for(ii=0, pLevel=pWInfo->a; ii<nTabList; ii++, pLevel++){ Table *pTab; /* Table to open */ int iDb; /* Index of database containing table/index */ @@ -122942,6 +124372,10 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( SQLITE_INT_TO_PTR(n), P4_INT32); assert( n<=pTab->nCol ); } +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, pTabItem->iCursor, 0, 0, + (const u8*)&pTabItem->colUsed, P4_INT64); +#endif }else{ sqlite3TableLock(pParse, iDb, pTab->tnum, 0, pTab->zName); } @@ -122987,10 +124421,24 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( sqlite3VdbeChangeP5(v, OPFLAG_SEEKEQ); /* Hint to COMDB2 */ } VdbeComment((v, "%s", pIx->zName)); +#ifdef SQLITE_ENABLE_COLUMN_USED_MASK + { + u64 colUsed = 0; + int ii, jj; + for(ii=0; ii<pIx->nColumn; ii++){ + jj = pIx->aiColumn[ii]; + if( jj<0 ) continue; + if( jj>63 ) jj = 63; + if( (pTabItem->colUsed & MASKBIT(jj))==0 ) continue; + colUsed |= ((u64)1)<<(ii<63 ? ii : 63); + } + sqlite3VdbeAddOp4Dup8(v, OP_ColumnsUsed, iIndexCur, 0, 0, + (u8*)&colUsed, P4_INT64); + } +#endif /* SQLITE_ENABLE_COLUMN_USED_MASK */ } } if( iDb>=0 ) sqlite3CodeVerifySchema(pParse, iDb); - notReady &= ~getMask(&pWInfo->sMaskSet, pTabItem->iCursor); } pWInfo->iTop = sqlite3VdbeCurrentAddr(v); if( db->mallocFailed ) goto whereBeginError; @@ -123012,14 +124460,14 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin( if( db->mallocFailed ) goto whereBeginError; } #endif - addrExplain = explainOneScan( + addrExplain = sqlite3WhereExplainOneScan( pParse, pTabList, pLevel, ii, pLevel->iFrom, wctrlFlags ); pLevel->addrBody = sqlite3VdbeCurrentAddr(v); - notReady = codeOneLoopStart(pWInfo, ii, notReady); + notReady = sqlite3WhereCodeOneLoopStart(pWInfo, ii, notReady); pWInfo->iContinue = pLevel->addrCont; if( (wsFlags&WHERE_MULTI_OR)==0 && (wctrlFlags&WHERE_ONETABLE_ONLY)==0 ){ - addScanStatus(v, pTabList, pLevel, addrExplain); + sqlite3WhereAddScanStatus(v, pTabList, pLevel, addrExplain); } } @@ -123133,26 +124581,12 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ pLoop = pLevel->pWLoop; /* For a co-routine, change all OP_Column references to the table of - ** the co-routine into OP_SCopy of result contained in a register. + ** the co-routine into OP_Copy of result contained in a register. ** OP_Rowid becomes OP_Null. */ if( pTabItem->viaCoroutine && !db->mallocFailed ){ - last = sqlite3VdbeCurrentAddr(v); - k = pLevel->addrBody; - pOp = sqlite3VdbeGetOp(v, k); - for(; k<last; k++, pOp++){ - if( pOp->p1!=pLevel->iTabCur ) continue; - if( pOp->opcode==OP_Column ){ - pOp->opcode = OP_Copy; - pOp->p1 = pOp->p2 + pTabItem->regResult; - pOp->p2 = pOp->p3; - pOp->p3 = 0; - }else if( pOp->opcode==OP_Rowid ){ - pOp->opcode = OP_Null; - pOp->p1 = 0; - pOp->p3 = 0; - } - } + translateColumnToCopy(v, pLevel->addrBody, pLevel->iTabCur, + pTabItem->regResult); continue; } @@ -123242,6 +124676,7 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){ ** in the input grammar file. */ /* #include <stdio.h> */ +/* #include "sqliteInt.h" */ /* ** Disable all error recovery processing in the parser push-down @@ -124719,7 +126154,7 @@ static int yy_pop_parser_stack(yyParser *pParser){ /* There is no mechanism by which the parser stack can be popped below ** empty in SQLite. */ - if( NEVER(pParser->yyidx<0) ) return 0; + assert( pParser->yyidx>=0 ); #ifndef NDEBUG if( yyTraceFILE && pParser->yyidx>=0 ){ fprintf(yyTraceFILE,"%sPopping %s\n", @@ -125420,7 +126855,7 @@ static void yy_reduce( case 35: /* table_options ::= WITHOUT nm */ { if( yymsp[0].minor.yy0.n==5 && sqlite3_strnicmp(yymsp[0].minor.yy0.z,"rowid",5)==0 ){ - yygotominor.yy186 = TF_WithoutRowid; + yygotominor.yy186 = TF_WithoutRowid | TF_NoVisibleRowid; }else{ yygotominor.yy186 = 0; sqlite3ErrorMsg(pParse, "unknown table option: %.*s", yymsp[0].minor.yy0.n, yymsp[0].minor.yy0.z); @@ -125644,6 +127079,7 @@ static void yy_reduce( case 114: /* selectnowith ::= selectnowith multiselect_op oneselect */ { Select *pRhs = yymsp[0].minor.yy3; + Select *pLhs = yymsp[-2].minor.yy3; if( pRhs && pRhs->pPrior ){ SrcList *pFrom; Token x; @@ -125654,11 +127090,12 @@ static void yy_reduce( } if( pRhs ){ pRhs->op = (u8)yymsp[-1].minor.yy328; - pRhs->pPrior = yymsp[-2].minor.yy3; + pRhs->pPrior = pLhs; + if( ALWAYS(pLhs) ) pLhs->selFlags &= ~SF_MultiValue; pRhs->selFlags &= ~SF_MultiValue; if( yymsp[-1].minor.yy328!=TK_ALL ) pParse->hasCompound = 1; }else{ - sqlite3SelectDelete(pParse->db, yymsp[-2].minor.yy3); + sqlite3SelectDelete(pParse->db, pLhs); } yygotominor.yy3 = pRhs; } @@ -125719,7 +127156,9 @@ static void yy_reduce( {yygotominor.yy381 = SF_Distinct;} break; case 123: /* distinct ::= ALL */ - case 124: /* distinct ::= */ yytestcase(yyruleno==124); +{yygotominor.yy381 = SF_All;} + break; + case 124: /* distinct ::= */ {yygotominor.yy381 = 0;} break; case 125: /* sclp ::= selcollist COMMA */ @@ -126014,7 +127453,7 @@ static void yy_reduce( } yygotominor.yy346.pExpr = sqlite3ExprFunction(pParse, yymsp[-1].minor.yy14, &yymsp[-4].minor.yy0); spanSet(&yygotominor.yy346,&yymsp[-4].minor.yy0,&yymsp[0].minor.yy0); - if( yymsp[-2].minor.yy381 && yygotominor.yy346.pExpr ){ + if( yymsp[-2].minor.yy381==SF_Distinct && yygotominor.yy346.pExpr ){ yygotominor.yy346.pExpr->flags |= EP_Distinct; } } @@ -126823,6 +128262,7 @@ SQLITE_PRIVATE void sqlite3Parser( ** individual tokens and sends those tokens one-by-one over to the ** parser for analysis. */ +/* #include "sqliteInt.h" */ /* #include <stdlib.h> */ /* @@ -127187,7 +128627,11 @@ SQLITE_PRIVATE const char sqlite3IsEbcdicIdChar[] = { }; #define IdChar(C) (((c=C)>=0x42 && sqlite3IsEbcdicIdChar[c-0x40])) #endif + +/* Make the IdChar function accessible from ctime.c */ +#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS SQLITE_PRIVATE int sqlite3IsIdChar(u8 c){ return IdChar(c); } +#endif /* @@ -127535,7 +128979,8 @@ SQLITE_PRIVATE int sqlite3RunParser(Parse *pParse, const char *zSql, char **pzEr } abort_parse: assert( nErr==0 ); - if( zSql[i]==0 && pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ + if( pParse->rc==SQLITE_OK && db->mallocFailed==0 ){ + assert( zSql[i]==0 ); if( lastTokenParsed!=TK_SEMI ){ sqlite3Parser(pEngine, TK_SEMI, pParse->sLastToken, pParse); pParse->zTail = &zSql[i]; @@ -127557,7 +129002,7 @@ abort_parse: pParse->rc = SQLITE_NOMEM; } if( pParse->rc!=SQLITE_OK && pParse->rc!=SQLITE_DONE && pParse->zErrMsg==0 ){ - sqlite3SetString(&pParse->zErrMsg, db, "%s", sqlite3ErrStr(pParse->rc)); + pParse->zErrMsg = sqlite3MPrintf(db, "%s", sqlite3ErrStr(pParse->rc)); } assert( pzErrMsg!=0 ); if( pParse->zErrMsg ){ @@ -127627,6 +129072,7 @@ abort_parse: ** separating it out, the code will be automatically omitted from ** static links that do not use it. */ +/* #include "sqliteInt.h" */ #ifndef SQLITE_OMIT_COMPLETE /* @@ -127894,7 +129340,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *zSql){ rc = SQLITE_NOMEM; } sqlite3ValueFree(pVal); - return sqlite3ApiExit(0, rc); + return rc & 0xff; } #endif /* SQLITE_OMIT_UTF16 */ #endif /* SQLITE_OMIT_COMPLETE */ @@ -127917,6 +129363,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *zSql){ ** other files are for internal use by SQLite and should not be ** accessed by users of the library. */ +/* #include "sqliteInt.h" */ #ifdef SQLITE_ENABLE_FTS3 /************** Include fts3.h in the middle of main.c ***********************/ @@ -127936,6 +129383,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_complete16(const void *zSql){ ** This header file is used by programs that want to link against the ** FTS3 library. All it does is declare the sqlite3Fts3Init() interface. */ +/* #include "sqlite3.h" */ #if 0 extern "C" { @@ -127968,6 +129416,7 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db); ** This header file is used by programs that want to link against the ** RTREE library. All it does is declare the sqlite3RtreeInit() interface. */ +/* #include "sqlite3.h" */ #if 0 extern "C" { @@ -128000,6 +129449,7 @@ SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db); ** This header file is used by programs that want to link against the ** ICU extension. All it does is declare the sqlite3IcuInit() interface. */ +/* #include "sqlite3.h" */ #if 0 extern "C" { @@ -128632,6 +130082,7 @@ SQLITE_API int SQLITE_CDECL sqlite3_config(int op, ...){ ** the lookaside memory. */ static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){ +#ifndef SQLITE_OMIT_LOOKASIDE void *pStart; if( db->lookaside.nOut ){ return SQLITE_BUSY; @@ -128682,6 +130133,7 @@ static int setupLookaside(sqlite3 *db, void *pBuf, int sz, int cnt){ db->lookaside.bEnabled = 0; db->lookaside.bMalloced = 0; } +#endif /* SQLITE_OMIT_LOOKASIDE */ return SQLITE_OK; } @@ -130072,9 +131524,11 @@ SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3 *db){ return ( db->temp_store!=1 ); #endif #if SQLITE_TEMP_STORE==3 + UNUSED_PARAMETER(db); return 1; #endif #if SQLITE_TEMP_STORE<1 || SQLITE_TEMP_STORE>3 + UNUSED_PARAMETER(db); return 0; #endif } @@ -130748,6 +132202,9 @@ static int openDatabase( #endif #if defined(SQLITE_REVERSE_UNORDERED_SELECTS) | SQLITE_ReverseOrder +#endif +#if defined(SQLITE_ENABLE_OVERSIZE_CELL_CHECK) + | SQLITE_CellSizeCk #endif ; sqlite3HashInit(&db->aCollSeq); @@ -130868,8 +132325,7 @@ static int openDatabase( #ifdef SQLITE_ENABLE_DBSTAT_VTAB if( !db->mallocFailed && rc==SQLITE_OK){ - int sqlite3_dbstat_register(sqlite3*); - rc = sqlite3_dbstat_register(db); + rc = sqlite3DbstatRegister(db); } #endif @@ -130914,7 +132370,7 @@ opendb_out: sqlite3GlobalConfig.xSqllog(pArg, db, zFilename, 0); } #endif - return sqlite3ApiExit(0, rc); + return rc & 0xff; } /* @@ -130972,7 +132428,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_open16( } sqlite3ValueFree(pVal); - return sqlite3ApiExit(0, rc); + return rc & 0xff; } #endif /* SQLITE_OMIT_UTF16 */ @@ -131344,7 +132800,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_file_control(sqlite3 *db, const char *zDbN */ SQLITE_API int SQLITE_CDECL sqlite3_test_control(int op, ...){ int rc = 0; -#ifndef SQLITE_OMIT_BUILTIN_TEST +#ifdef SQLITE_OMIT_BUILTIN_TEST + UNUSED_PARAMETER(op); +#else va_list ap; va_start(ap, op); switch( op ){ @@ -131786,6 +133244,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_db_readonly(sqlite3 *db, const char *zDbNa ** This file contains the implementation of the sqlite3_unlock_notify() ** API method and its associated functionality. */ +/* #include "sqliteInt.h" */ +/* #include "btreeInt.h" */ /* Omit this entire file if SQLITE_ENABLE_UNLOCK_NOTIFY is not defined. */ #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY @@ -132429,9 +133889,11 @@ SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){ /* If not building as part of the core, include sqlite3ext.h. */ #ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ SQLITE_EXTENSION_INIT3 #endif +/* #include "sqlite3.h" */ /************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/ /************** Begin file fts3_tokenizer.h **********************************/ /* @@ -132460,6 +133922,7 @@ SQLITE_EXTENSION_INIT3 ** If tokenizers are to be allowed to call sqlite3_*() functions, then ** we will need a way to register the API consistently. */ +/* #include "sqlite3.h" */ /* ** Structures used by the tokenizer interface. When a new tokenizer @@ -132873,6 +134336,8 @@ typedef struct Fts3DeferredToken Fts3DeferredToken; typedef struct Fts3SegReader Fts3SegReader; typedef struct Fts3MultiSegReader Fts3MultiSegReader; +typedef struct MatchinfoBuffer MatchinfoBuffer; + /* ** A connection to a fulltext index is an instance of the following ** structure. The xCreate and xConnect methods create an instance @@ -132982,9 +134447,7 @@ struct Fts3Cursor { i64 iMinDocid; /* Minimum docid to return */ i64 iMaxDocid; /* Maximum docid to return */ int isMatchinfoNeeded; /* True when aMatchinfo[] needs filling in */ - u32 *aMatchinfo; /* Information about most recent match */ - int nMatchinfo; /* Number of elements in aMatchinfo[] */ - char *zMatchinfo; /* Matchinfo specification */ + MatchinfoBuffer *pMIBuffer; /* Buffer for matchinfo data */ }; #define FTS3_EVAL_FILTER 0 @@ -133104,7 +134567,9 @@ struct Fts3Expr { u8 bStart; /* True if iDocid is valid */ u8 bDeferred; /* True if this expression is entirely deferred */ - u32 *aMI; + /* The following are used by the fts3_snippet.c module. */ + int iPhrase; /* Index of this phrase in matchinfo() results */ + u32 *aMI; /* See above */ }; /* @@ -133225,6 +134690,7 @@ SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,i SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *); SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *); SQLITE_PRIVATE void sqlite3Fts3CreateStatTable(int*, Fts3Table*); +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc); /* fts3_tokenizer.c */ SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *); @@ -133240,6 +134706,7 @@ SQLITE_PRIVATE void sqlite3Fts3Snippet(sqlite3_context *, Fts3Cursor *, const ch const char *, const char *, int, int ); SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const char *); +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p); /* fts3_expr.c */ SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, int, @@ -133296,7 +134763,9 @@ SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int); /* #include <string.h> */ /* #include <stdarg.h> */ +/* #include "fts3.h" */ #ifndef SQLITE_CORE +/* # include "sqlite3ext.h" */ SQLITE_EXTENSION_INIT1 #endif @@ -134667,7 +136136,7 @@ static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){ sqlite3Fts3ExprFree(pCsr->pExpr); sqlite3Fts3FreeDeferredTokens(pCsr); sqlite3_free(pCsr->aDoclist); - sqlite3_free(pCsr->aMatchinfo); + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 ); sqlite3_free(pCsr); return SQLITE_OK; @@ -136168,7 +137637,7 @@ static int fts3FilterMethod( /* In case the cursor has been used before, clear it now. */ sqlite3_finalize(pCsr->pStmt); sqlite3_free(pCsr->aDoclist); - sqlite3_free(pCsr->aMatchinfo); + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); sqlite3Fts3ExprFree(pCsr->pExpr); memset(&pCursor[1], 0, sizeof(Fts3Cursor)-sizeof(sqlite3_vtab_cursor)); @@ -137223,7 +138692,6 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){ int bIncrOk = (bOptOk && pCsr->bDesc==pTab->bDescIdx && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 - && p->nToken<=MAX_INCR_PHRASE_TOKENS && p->nToken>0 #ifdef SQLITE_TEST && pTab->bNoIncrDoclist==0 #endif @@ -137343,6 +138811,7 @@ SQLITE_PRIVATE void sqlite3Fts3DoclistNext( p += sqlite3Fts3GetVarint(p, piDocid); }else{ fts3PoslistCopy(0, &p); + while( p<&aDoclist[nDoclist] && *p==0 ) p++; if( p>=&aDoclist[nDoclist] ){ *pbEof = 1; }else{ @@ -138066,7 +139535,7 @@ static int fts3EvalNearTrim( ** 2. NEAR is treated as AND. If the expression is "x NEAR y", it is ** advanced to point to the next row that matches "x AND y". ** -** See fts3EvalTestDeferredAndNear() for details on testing if a row is +** See sqlite3Fts3EvalTestDeferred() for details on testing if a row is ** really a match, taking into account deferred tokens and NEAR operators. */ static void fts3EvalNextRow( @@ -138286,7 +139755,7 @@ static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){ } /* -** This function is a helper function for fts3EvalTestDeferredAndNear(). +** This function is a helper function for sqlite3Fts3EvalTestDeferred(). ** Assuming no error occurs or has occurred, It returns non-zero if the ** expression passed as the second argument matches the row that pCsr ** currently points to, or zero if it does not. @@ -138407,7 +139876,7 @@ static int fts3EvalTestExpr( ** Or, if no error occurs and it seems the current row does match the FTS ** query, return 0. */ -static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){ +SQLITE_PRIVATE int sqlite3Fts3EvalTestDeferred(Fts3Cursor *pCsr, int *pRc){ int rc = *pRc; int bMiss = 0; if( rc==SQLITE_OK ){ @@ -138454,7 +139923,7 @@ static int fts3EvalNext(Fts3Cursor *pCsr){ pCsr->isRequireSeek = 1; pCsr->isMatchinfoNeeded = 1; pCsr->iPrevId = pExpr->iDocid; - }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) ); + }while( pCsr->isEof==0 && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); } /* Check if the cursor is past the end of the docid range specified @@ -138615,7 +140084,7 @@ static int fts3EvalGatherStats( pCsr->iPrevId = pRoot->iDocid; }while( pCsr->isEof==0 && pRoot->eType==FTSQUERY_NEAR - && fts3EvalTestDeferredAndNear(pCsr, &rc) + && sqlite3Fts3EvalTestDeferred(pCsr, &rc) ); if( rc==SQLITE_OK && pCsr->isEof==0 ){ @@ -138640,7 +140109,6 @@ static int fts3EvalGatherStats( fts3EvalNextRow(pCsr, pRoot, &rc); assert( pRoot->bEof==0 ); }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK ); - fts3EvalTestDeferredAndNear(pCsr, &rc); } } return rc; @@ -138750,10 +140218,10 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( int rc = SQLITE_OK; int bDescDoclist = pTab->bDescIdx; /* For DOCID_CMP macro */ int bOr = 0; - u8 bEof = 0; u8 bTreeEof = 0; Fts3Expr *p; /* Used to iterate from pExpr to root */ Fts3Expr *pNear; /* Most senior NEAR ancestor (or pExpr) */ + int bMatch; /* Check if this phrase descends from an OR expression node. If not, ** return NULL. Otherwise, the entry that corresponds to docid @@ -138787,31 +140255,47 @@ SQLITE_PRIVATE int sqlite3Fts3EvalPhrasePoslist( } if( rc!=SQLITE_OK ) return rc; - pIter = pPhrase->pOrPoslist; - iDocid = pPhrase->iOrDocid; - if( pCsr->bDesc==bDescDoclist ){ - bEof = !pPhrase->doclist.nAll || - (pIter >= (pPhrase->doclist.aAll + pPhrase->doclist.nAll)); - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ - sqlite3Fts3DoclistNext( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &bEof - ); - } - }else{ - bEof = !pPhrase->doclist.nAll || (pIter && pIter<=pPhrase->doclist.aAll); - while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ - int dummy; - sqlite3Fts3DoclistPrev( - bDescDoclist, pPhrase->doclist.aAll, pPhrase->doclist.nAll, - &pIter, &iDocid, &dummy, &bEof - ); + bMatch = 1; + for(p=pNear; p; p=p->pLeft){ + u8 bEof = 0; + Fts3Expr *pTest = p; + Fts3Phrase *pPh; + assert( pTest->eType==FTSQUERY_NEAR || pTest->eType==FTSQUERY_PHRASE ); + if( pTest->eType==FTSQUERY_NEAR ) pTest = pTest->pRight; + assert( pTest->eType==FTSQUERY_PHRASE ); + pPh = pTest->pPhrase; + + pIter = pPh->pOrPoslist; + iDocid = pPh->iOrDocid; + if( pCsr->bDesc==bDescDoclist ){ + bEof = !pPh->doclist.nAll || + (pIter >= (pPh->doclist.aAll + pPh->doclist.nAll)); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)<0 ) && bEof==0 ){ + sqlite3Fts3DoclistNext( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &bEof + ); + } + }else{ + bEof = !pPh->doclist.nAll || (pIter && pIter<=pPh->doclist.aAll); + while( (pIter==0 || DOCID_CMP(iDocid, pCsr->iPrevId)>0 ) && bEof==0 ){ + int dummy; + sqlite3Fts3DoclistPrev( + bDescDoclist, pPh->doclist.aAll, pPh->doclist.nAll, + &pIter, &iDocid, &dummy, &bEof + ); + } } + pPh->pOrPoslist = pIter; + pPh->iOrDocid = iDocid; + if( bEof || iDocid!=pCsr->iPrevId ) bMatch = 0; } - pPhrase->pOrPoslist = pIter; - pPhrase->iOrDocid = iDocid; - if( bEof || iDocid!=pCsr->iPrevId ) pIter = 0; + if( bMatch ){ + pIter = pPhrase->pOrPoslist; + }else{ + pIter = 0; + } } if( pIter==0 ) return SQLITE_OK; @@ -138899,6 +140383,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_fts3_init( ****************************************************************************** ** */ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* #include <string.h> */ @@ -139455,6 +140940,7 @@ SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db){ ** syntax is relatively simple, the whole tokenizer/parser system is ** hand-coded. */ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* @@ -140410,321 +141896,1372 @@ static int fts3ExprParseUnbalanced( } /* -** Parameters z and n contain a pointer to and length of a buffer containing -** an fts3 query expression, respectively. This function attempts to parse the -** query expression and create a tree of Fts3Expr structures representing the -** parsed expression. If successful, *ppExpr is set to point to the head -** of the parsed expression tree and SQLITE_OK is returned. If an error -** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse -** error) is returned and *ppExpr is set to 0. +** Parameters z and n contain a pointer to and length of a buffer containing +** an fts3 query expression, respectively. This function attempts to parse the +** query expression and create a tree of Fts3Expr structures representing the +** parsed expression. If successful, *ppExpr is set to point to the head +** of the parsed expression tree and SQLITE_OK is returned. If an error +** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse +** error) is returned and *ppExpr is set to 0. +** +** If parameter n is a negative number, then z is assumed to point to a +** nul-terminated string and the length is determined using strlen(). +** +** The first parameter, pTokenizer, is passed the fts3 tokenizer module to +** use to normalize query tokens while parsing the expression. The azCol[] +** array, which is assumed to contain nCol entries, should contain the names +** of each column in the target fts3 table, in order from left to right. +** Column names must be nul-terminated strings. +** +** The iDefaultCol parameter should be passed the index of the table column +** that appears on the left-hand-side of the MATCH operator (the default +** column to match against for tokens for which a column name is not explicitly +** specified as part of the query string), or -1 if tokens may by default +** match any table column. +*/ +SQLITE_PRIVATE int sqlite3Fts3ExprParse( + sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ + int iLangid, /* Language id for tokenizer */ + char **azCol, /* Array of column names for fts3 table */ + int bFts4, /* True to allow FTS4-only syntax */ + int nCol, /* Number of entries in azCol[] */ + int iDefaultCol, /* Default column to query */ + const char *z, int n, /* Text of MATCH query */ + Fts3Expr **ppExpr, /* OUT: Parsed query structure */ + char **pzErr /* OUT: Error message (sqlite3_malloc) */ +){ + int rc = fts3ExprParseUnbalanced( + pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr + ); + + /* Rebalance the expression. And check that its depth does not exceed + ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ + if( rc==SQLITE_OK && *ppExpr ){ + rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + if( rc==SQLITE_OK ){ + rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3Fts3ExprFree(*ppExpr); + *ppExpr = 0; + if( rc==SQLITE_TOOBIG ){ + sqlite3Fts3ErrMsg(pzErr, + "FTS expression tree is too large (maximum depth %d)", + SQLITE_FTS3_MAX_EXPR_DEPTH + ); + rc = SQLITE_ERROR; + }else if( rc==SQLITE_ERROR ){ + sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); + } + } + + return rc; +} + +/* +** Free a single node of an expression tree. +*/ +static void fts3FreeExprNode(Fts3Expr *p){ + assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); + sqlite3Fts3EvalPhraseCleanup(p->pPhrase); + sqlite3_free(p->aMI); + sqlite3_free(p); +} + +/* +** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** +** This function would be simpler if it recursively called itself. But +** that would mean passing a sufficiently large expression to ExprParse() +** could cause a stack overflow. +*/ +SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ + Fts3Expr *p; + assert( pDel==0 || pDel->pParent==0 ); + for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ + assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); + } + while( p ){ + Fts3Expr *pParent = p->pParent; + fts3FreeExprNode(p); + if( pParent && p==pParent->pLeft && pParent->pRight ){ + p = pParent->pRight; + while( p && (p->pLeft || p->pRight) ){ + assert( p==p->pParent->pRight || p==p->pParent->pLeft ); + p = (p->pLeft ? p->pLeft : p->pRight); + } + }else{ + p = pParent; + } + } +} + +/**************************************************************************** +***************************************************************************** +** Everything after this point is just test code. +*/ + +#ifdef SQLITE_TEST + +/* #include <stdio.h> */ + +/* +** Function to query the hash-table of tokenizers (see README.tokenizers). +*/ +static int queryTestTokenizer( + sqlite3 *db, + const char *zName, + const sqlite3_tokenizer_module **pp +){ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?)"; + + *pp = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ + memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); + } + } + + return sqlite3_finalize(pStmt); +} + +/* +** Return a pointer to a buffer containing a text representation of the +** expression passed as the first argument. The buffer is obtained from +** sqlite3_malloc(). It is the responsibility of the caller to use +** sqlite3_free() to release the memory. If an OOM condition is encountered, +** NULL is returned. +** +** If the second argument is not NULL, then its contents are prepended to +** the returned expression text and then freed using sqlite3_free(). +*/ +static char *exprToString(Fts3Expr *pExpr, char *zBuf){ + if( pExpr==0 ){ + return sqlite3_mprintf(""); + } + switch( pExpr->eType ){ + case FTSQUERY_PHRASE: { + Fts3Phrase *pPhrase = pExpr->pPhrase; + int i; + zBuf = sqlite3_mprintf( + "%zPHRASE %d 0", zBuf, pPhrase->iColumn); + for(i=0; zBuf && i<pPhrase->nToken; i++){ + zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, + pPhrase->aToken[i].n, pPhrase->aToken[i].z, + (pPhrase->aToken[i].isPrefix?"+":"") + ); + } + return zBuf; + } + + case FTSQUERY_NEAR: + zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); + break; + case FTSQUERY_NOT: + zBuf = sqlite3_mprintf("%zNOT ", zBuf); + break; + case FTSQUERY_AND: + zBuf = sqlite3_mprintf("%zAND ", zBuf); + break; + case FTSQUERY_OR: + zBuf = sqlite3_mprintf("%zOR ", zBuf); + break; + } + + if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); + if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); + + if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); + if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); + + return zBuf; +} + +/* +** This is the implementation of a scalar SQL function used to test the +** expression parser. It should be called as follows: +** +** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); +** +** The first argument, <tokenizer>, is the name of the fts3 tokenizer used +** to parse the query expression (see README.tokenizers). The second argument +** is the query expression to parse. Each subsequent argument is the name +** of a column of the fts3 table that the query expression may refer to. +** For example: +** +** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); +*/ +static void fts3ExprTest( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + sqlite3_tokenizer_module const *pModule = 0; + sqlite3_tokenizer *pTokenizer = 0; + int rc; + char **azCol = 0; + const char *zExpr; + int nExpr; + int nCol; + int ii; + Fts3Expr *pExpr; + char *zBuf = 0; + sqlite3 *db = sqlite3_context_db_handle(context); + + if( argc<3 ){ + sqlite3_result_error(context, + "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 + ); + return; + } + + rc = queryTestTokenizer(db, + (const char *)sqlite3_value_text(argv[0]), &pModule); + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + }else if( !pModule ){ + sqlite3_result_error(context, "No such tokenizer module", -1); + goto exprtest_out; + } + + rc = pModule->xCreate(0, 0, &pTokenizer); + assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); + if( rc==SQLITE_NOMEM ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + } + pTokenizer->pModule = pModule; + + zExpr = (const char *)sqlite3_value_text(argv[1]); + nExpr = sqlite3_value_bytes(argv[1]); + nCol = argc-2; + azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); + if( !azCol ){ + sqlite3_result_error_nomem(context); + goto exprtest_out; + } + for(ii=0; ii<nCol; ii++){ + azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); + } + + if( sqlite3_user_data(context) ){ + char *zDummy = 0; + rc = sqlite3Fts3ExprParse( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy + ); + assert( rc==SQLITE_OK || pExpr==0 ); + sqlite3_free(zDummy); + }else{ + rc = fts3ExprParseUnbalanced( + pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr + ); + } + + if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ + sqlite3Fts3ExprFree(pExpr); + sqlite3_result_error(context, "Error parsing expression", -1); + }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ + sqlite3_result_error_nomem(context); + }else{ + sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); + sqlite3_free(zBuf); + } + + sqlite3Fts3ExprFree(pExpr); + +exprtest_out: + if( pModule && pTokenizer ){ + rc = pModule->xDestroy(pTokenizer); + } + sqlite3_free(azCol); +} + +/* +** Register the query expression parser test function fts3_exprtest() +** with database connection db. +*/ +SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ + int rc = sqlite3_create_function( + db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 + ); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", + -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 + ); + } + return rc; +} + +#endif +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_expr.c *******************************************/ +/************** Begin file fts3_hash.c ***************************************/ +/* +** 2001 September 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the implementation of generic hash-tables used in SQLite. +** We've modified it slightly to serve as a standalone hash table +** implementation for the full-text indexing module. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include <assert.h> */ +/* #include <stdlib.h> */ +/* #include <string.h> */ + +/* #include "fts3_hash.h" */ + +/* +** Malloc and Free functions +*/ +static void *fts3HashMalloc(int n){ + void *p = sqlite3_malloc(n); + if( p ){ + memset(p, 0, n); + } + return p; +} +static void fts3HashFree(void *p){ + sqlite3_free(p); +} + +/* Turn bulk memory into a hash table object by initializing the +** fields of the Hash structure. +** +** "pNew" is a pointer to the hash table that is to be initialized. +** keyClass is one of the constants +** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass +** determines what kind of key the hash table will use. "copyKey" is +** true if the hash table should make its own private copy of keys and +** false if it should just use the supplied pointer. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){ + assert( pNew!=0 ); + assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); + pNew->keyClass = keyClass; + pNew->copyKey = copyKey; + pNew->first = 0; + pNew->count = 0; + pNew->htsize = 0; + pNew->ht = 0; +} + +/* Remove all entries from a hash table. Reclaim all memory. +** Call this routine to delete a hash table or to reset a hash table +** to the empty state. +*/ +SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ + Fts3HashElem *elem; /* For looping over all elements of the table */ + + assert( pH!=0 ); + elem = pH->first; + pH->first = 0; + fts3HashFree(pH->ht); + pH->ht = 0; + pH->htsize = 0; + while( elem ){ + Fts3HashElem *next_elem = elem->next; + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); + } + fts3HashFree(elem); + elem = next_elem; + } + pH->count = 0; +} + +/* +** Hash and comparison functions when the mode is FTS3_HASH_STRING +*/ +static int fts3StrHash(const void *pKey, int nKey){ + const char *z = (const char *)pKey; + unsigned h = 0; + if( nKey<=0 ) nKey = (int) strlen(z); + while( nKey > 0 ){ + h = (h<<3) ^ h ^ *z++; + nKey--; + } + return (int)(h & 0x7fffffff); +} +static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return strncmp((const char*)pKey1,(const char*)pKey2,n1); +} + +/* +** Hash and comparison functions when the mode is FTS3_HASH_BINARY +*/ +static int fts3BinHash(const void *pKey, int nKey){ + int h = 0; + const char *z = (const char *)pKey; + while( nKey-- > 0 ){ + h = (h<<3) ^ h ^ *(z++); + } + return h & 0x7fffffff; +} +static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ + if( n1!=n2 ) return 1; + return memcmp(pKey1,pKey2,n1); +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** The C syntax in this function definition may be unfamilar to some +** programmers, so we provide the following additional explanation: +** +** The name of the function is "ftsHashFunction". The function takes a +** single parameter "keyClass". The return value of ftsHashFunction() +** is a pointer to another function. Specifically, the return value +** of ftsHashFunction() is a pointer to a function that takes two parameters +** with types "const void*" and "int" and returns an "int". +*/ +static int (*ftsHashFunction(int keyClass))(const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrHash; + }else{ + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinHash; + } +} + +/* +** Return a pointer to the appropriate hash function given the key class. +** +** For help in interpreted the obscure C code in the function definition, +** see the header comment on the previous function. +*/ +static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ + if( keyClass==FTS3_HASH_STRING ){ + return &fts3StrCompare; + }else{ + assert( keyClass==FTS3_HASH_BINARY ); + return &fts3BinCompare; + } +} + +/* Link an element into the hash table +*/ +static void fts3HashInsertElement( + Fts3Hash *pH, /* The complete hash table */ + struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ + Fts3HashElem *pNew /* The element to be inserted */ +){ + Fts3HashElem *pHead; /* First element already in pEntry */ + pHead = pEntry->chain; + if( pHead ){ + pNew->next = pHead; + pNew->prev = pHead->prev; + if( pHead->prev ){ pHead->prev->next = pNew; } + else { pH->first = pNew; } + pHead->prev = pNew; + }else{ + pNew->next = pH->first; + if( pH->first ){ pH->first->prev = pNew; } + pNew->prev = 0; + pH->first = pNew; + } + pEntry->count++; + pEntry->chain = pNew; +} + + +/* Resize the hash table so that it cantains "new_size" buckets. +** "new_size" must be a power of 2. The hash table might fail +** to resize if sqliteMalloc() fails. +** +** Return non-zero if a memory allocation error occurs. +*/ +static int fts3Rehash(Fts3Hash *pH, int new_size){ + struct _fts3ht *new_ht; /* The new hash table */ + Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( (new_size & (new_size-1))==0 ); + new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); + if( new_ht==0 ) return 1; + fts3HashFree(pH->ht); + pH->ht = new_ht; + pH->htsize = new_size; + xHash = ftsHashFunction(pH->keyClass); + for(elem=pH->first, pH->first=0; elem; elem = next_elem){ + int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); + next_elem = elem->next; + fts3HashInsertElement(pH, &new_ht[h], elem); + } + return 0; +} + +/* This function (for internal use only) locates an element in an +** hash table that matches the given key. The hash for this key has +** already been computed and is passed as the 4th parameter. +*/ +static Fts3HashElem *fts3FindElementByHash( + const Fts3Hash *pH, /* The pH to be searched */ + const void *pKey, /* The key we are searching for */ + int nKey, + int h /* The hash for this key. */ +){ + Fts3HashElem *elem; /* Used to loop thru the element list */ + int count; /* Number of elements left to test */ + int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + + if( pH->ht ){ + struct _fts3ht *pEntry = &pH->ht[h]; + elem = pEntry->chain; + count = pEntry->count; + xCompare = ftsCompareFunction(pH->keyClass); + while( count-- && elem ){ + if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ + return elem; + } + elem = elem->next; + } + } + return 0; +} + +/* Remove a single entry from the hash table given a pointer to that +** element and a hash on the element's key. +*/ +static void fts3RemoveElementByHash( + Fts3Hash *pH, /* The pH containing "elem" */ + Fts3HashElem* elem, /* The element to be removed from the pH */ + int h /* Hash value for the element */ +){ + struct _fts3ht *pEntry; + if( elem->prev ){ + elem->prev->next = elem->next; + }else{ + pH->first = elem->next; + } + if( elem->next ){ + elem->next->prev = elem->prev; + } + pEntry = &pH->ht[h]; + if( pEntry->chain==elem ){ + pEntry->chain = elem->next; + } + pEntry->count--; + if( pEntry->count<=0 ){ + pEntry->chain = 0; + } + if( pH->copyKey && elem->pKey ){ + fts3HashFree(elem->pKey); + } + fts3HashFree( elem ); + pH->count--; + if( pH->count<=0 ){ + assert( pH->first==0 ); + assert( pH->count==0 ); + fts3HashClear(pH); + } +} + +SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( + const Fts3Hash *pH, + const void *pKey, + int nKey +){ + int h; /* A hash on key */ + int (*xHash)(const void*,int); /* The hash function */ + + if( pH==0 || pH->ht==0 ) return 0; + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + h = (*xHash)(pKey,nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); +} + +/* +** Attempt to locate an element of the hash table pH with a key +** that matches pKey,nKey. Return the data for this element if it is +** found, or NULL if there is no match. +*/ +SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){ + Fts3HashElem *pElem; /* The element that matches key (if any) */ + + pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); + return pElem ? pElem->data : 0; +} + +/* Insert an element into the hash table pH. The key is pKey,nKey +** and the data is "data". +** +** If no element exists with a matching key, then a new +** element is created. A copy of the key is made if the copyKey +** flag is set. NULL is returned. +** +** If another element already exists with the same key, then the +** new data replaces the old data and the old data is returned. +** The key is not copied in this instance. If a malloc fails, then +** the new data is returned and the hash table is unchanged. +** +** If the "data" parameter to this function is NULL, then the +** element corresponding to "key" is removed from the hash table. +*/ +SQLITE_PRIVATE void *sqlite3Fts3HashInsert( + Fts3Hash *pH, /* The hash table to insert into */ + const void *pKey, /* The key */ + int nKey, /* Number of bytes in the key */ + void *data /* The data */ +){ + int hraw; /* Raw hash value of the key */ + int h; /* the hash of the key modulo hash table size */ + Fts3HashElem *elem; /* Used to loop thru the element list */ + Fts3HashElem *new_elem; /* New element added to the pH */ + int (*xHash)(const void*,int); /* The hash function */ + + assert( pH!=0 ); + xHash = ftsHashFunction(pH->keyClass); + assert( xHash!=0 ); + hraw = (*xHash)(pKey, nKey); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + elem = fts3FindElementByHash(pH,pKey,nKey,h); + if( elem ){ + void *old_data = elem->data; + if( data==0 ){ + fts3RemoveElementByHash(pH,elem,h); + }else{ + elem->data = data; + } + return old_data; + } + if( data==0 ) return 0; + if( (pH->htsize==0 && fts3Rehash(pH,8)) + || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) + ){ + pH->count = 0; + return data; + } + assert( pH->htsize>0 ); + new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); + if( new_elem==0 ) return data; + if( pH->copyKey && pKey!=0 ){ + new_elem->pKey = fts3HashMalloc( nKey ); + if( new_elem->pKey==0 ){ + fts3HashFree(new_elem); + return data; + } + memcpy((void*)new_elem->pKey, pKey, nKey); + }else{ + new_elem->pKey = (void*)pKey; + } + new_elem->nKey = nKey; + pH->count++; + assert( pH->htsize>0 ); + assert( (pH->htsize & (pH->htsize-1))==0 ); + h = hraw & (pH->htsize-1); + fts3HashInsertElement(pH, &pH->ht[h], new_elem); + new_elem->data = data; + return 0; +} + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_hash.c *******************************************/ +/************** Begin file fts3_porter.c *************************************/ +/* +** 2006 September 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** Implementation of the full-text-search tokenizer that implements +** a Porter stemmer. +*/ + +/* +** The code in this file is only compiled if: +** +** * The FTS3 module is being built as an extension +** (in which case SQLITE_CORE is not defined), or +** +** * The FTS3 module is being built into the core of +** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +*/ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include <assert.h> */ +/* #include <stdlib.h> */ +/* #include <stdio.h> */ +/* #include <string.h> */ + +/* #include "fts3_tokenizer.h" */ + +/* +** Class derived from sqlite3_tokenizer +*/ +typedef struct porter_tokenizer { + sqlite3_tokenizer base; /* Base class */ +} porter_tokenizer; + +/* +** Class derived from sqlite3_tokenizer_cursor +*/ +typedef struct porter_tokenizer_cursor { + sqlite3_tokenizer_cursor base; + const char *zInput; /* input we are tokenizing */ + int nInput; /* size of the input */ + int iOffset; /* current position in zInput */ + int iToken; /* index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAllocated; /* space allocated to zToken buffer */ +} porter_tokenizer_cursor; + + +/* +** Create a new tokenizer instance. +*/ +static int porterCreate( + int argc, const char * const *argv, + sqlite3_tokenizer **ppTokenizer +){ + porter_tokenizer *t; + + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); + + t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); + if( t==NULL ) return SQLITE_NOMEM; + memset(t, 0, sizeof(*t)); + *ppTokenizer = &t->base; + return SQLITE_OK; +} + +/* +** Destroy a tokenizer +*/ +static int porterDestroy(sqlite3_tokenizer *pTokenizer){ + sqlite3_free(pTokenizer); + return SQLITE_OK; +} + +/* +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is zInput[0..nInput-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. +*/ +static int porterOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, int nInput, /* String to be tokenized */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + porter_tokenizer_cursor *c; + + UNUSED_PARAMETER(pTokenizer); + + c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + if( c==NULL ) return SQLITE_NOMEM; + + c->zInput = zInput; + if( zInput==0 ){ + c->nInput = 0; + }else if( nInput<0 ){ + c->nInput = (int)strlen(zInput); + }else{ + c->nInput = nInput; + } + c->iOffset = 0; /* start tokenizing at the beginning */ + c->iToken = 0; + c->zToken = NULL; /* no space allocated, yet. */ + c->nAllocated = 0; + + *ppCursor = &c->base; + return SQLITE_OK; +} + +/* +** Close a tokenization cursor previously opened by a call to +** porterOpen() above. +*/ +static int porterClose(sqlite3_tokenizer_cursor *pCursor){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + sqlite3_free(c->zToken); + sqlite3_free(c); + return SQLITE_OK; +} +/* +** Vowel or consonant +*/ +static const char cType[] = { + 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, + 1, 1, 1, 2, 1 +}; + +/* +** isConsonant() and isVowel() determine if their first character in +** the string they point to is a consonant or a vowel, according +** to Porter ruls. +** +** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. +** 'Y' is a consonant unless it follows another consonant, +** in which case it is a vowel. +** +** In these routine, the letters are in reverse order. So the 'y' rule +** is that 'y' is a consonant unless it is followed by another +** consonent. +*/ +static int isVowel(const char*); +static int isConsonant(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return j; + return z[1]==0 || isVowel(z + 1); +} +static int isVowel(const char *z){ + int j; + char x = *z; + if( x==0 ) return 0; + assert( x>='a' && x<='z' ); + j = cType[x-'a']; + if( j<2 ) return 1-j; + return isConsonant(z + 1); +} + +/* +** Let any sequence of one or more vowels be represented by V and let +** C be sequence of one or more consonants. Then every word can be +** represented as: ** -** If parameter n is a negative number, then z is assumed to point to a -** nul-terminated string and the length is determined using strlen(). +** [C] (VC){m} [V] ** -** The first parameter, pTokenizer, is passed the fts3 tokenizer module to -** use to normalize query tokens while parsing the expression. The azCol[] -** array, which is assumed to contain nCol entries, should contain the names -** of each column in the target fts3 table, in order from left to right. -** Column names must be nul-terminated strings. +** In prose: A word is an optional consonant followed by zero or +** vowel-consonant pairs followed by an optional vowel. "m" is the +** number of vowel consonant pairs. This routine computes the value +** of m for the first i bytes of a word. ** -** The iDefaultCol parameter should be passed the index of the table column -** that appears on the left-hand-side of the MATCH operator (the default -** column to match against for tokens for which a column name is not explicitly -** specified as part of the query string), or -1 if tokens may by default -** match any table column. +** Return true if the m-value for z is 1 or more. In other words, +** return true if z contains at least one vowel that is followed +** by a consonant. +** +** In this routine z[] is in reverse order. So we are really looking +** for an instance of a consonant followed by a vowel. */ -SQLITE_PRIVATE int sqlite3Fts3ExprParse( - sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ - int iLangid, /* Language id for tokenizer */ - char **azCol, /* Array of column names for fts3 table */ - int bFts4, /* True to allow FTS4-only syntax */ - int nCol, /* Number of entries in azCol[] */ - int iDefaultCol, /* Default column to query */ - const char *z, int n, /* Text of MATCH query */ - Fts3Expr **ppExpr, /* OUT: Parsed query structure */ - char **pzErr /* OUT: Error message (sqlite3_malloc) */ -){ - int rc = fts3ExprParseUnbalanced( - pTokenizer, iLangid, azCol, bFts4, nCol, iDefaultCol, z, n, ppExpr - ); - - /* Rebalance the expression. And check that its depth does not exceed - ** SQLITE_FTS3_MAX_EXPR_DEPTH. */ - if( rc==SQLITE_OK && *ppExpr ){ - rc = fts3ExprBalance(ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); - if( rc==SQLITE_OK ){ - rc = fts3ExprCheckDepth(*ppExpr, SQLITE_FTS3_MAX_EXPR_DEPTH); - } - } +static int m_gt_0(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; +} - if( rc!=SQLITE_OK ){ - sqlite3Fts3ExprFree(*ppExpr); - *ppExpr = 0; - if( rc==SQLITE_TOOBIG ){ - sqlite3Fts3ErrMsg(pzErr, - "FTS expression tree is too large (maximum depth %d)", - SQLITE_FTS3_MAX_EXPR_DEPTH - ); - rc = SQLITE_ERROR; - }else if( rc==SQLITE_ERROR ){ - sqlite3Fts3ErrMsg(pzErr, "malformed MATCH expression: [%s]", z); - } - } +/* Like mgt0 above except we are looking for a value of m which is +** exactly 1 +*/ +static int m_eq_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 1; + while( isConsonant(z) ){ z++; } + return *z==0; +} - return rc; +/* Like mgt0 above except we are looking for a value of m>1 instead +** or m>0 +*/ +static int m_gt_1(const char *z){ + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + if( *z==0 ) return 0; + while( isVowel(z) ){ z++; } + if( *z==0 ) return 0; + while( isConsonant(z) ){ z++; } + return *z!=0; } /* -** Free a single node of an expression tree. +** Return TRUE if there is a vowel anywhere within z[0..n-1] */ -static void fts3FreeExprNode(Fts3Expr *p){ - assert( p->eType==FTSQUERY_PHRASE || p->pPhrase==0 ); - sqlite3Fts3EvalPhraseCleanup(p->pPhrase); - sqlite3_free(p->aMI); - sqlite3_free(p); +static int hasVowel(const char *z){ + while( isConsonant(z) ){ z++; } + return *z!=0; } /* -** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). +** Return TRUE if the word ends in a double consonant. ** -** This function would be simpler if it recursively called itself. But -** that would mean passing a sufficiently large expression to ExprParse() -** could cause a stack overflow. +** The text is reversed here. So we are really looking at +** the first two characters of z[]. */ -SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *pDel){ - Fts3Expr *p; - assert( pDel==0 || pDel->pParent==0 ); - for(p=pDel; p && (p->pLeft||p->pRight); p=(p->pLeft ? p->pLeft : p->pRight)){ - assert( p->pParent==0 || p==p->pParent->pRight || p==p->pParent->pLeft ); - } - while( p ){ - Fts3Expr *pParent = p->pParent; - fts3FreeExprNode(p); - if( pParent && p==pParent->pLeft && pParent->pRight ){ - p = pParent->pRight; - while( p && (p->pLeft || p->pRight) ){ - assert( p==p->pParent->pRight || p==p->pParent->pLeft ); - p = (p->pLeft ? p->pLeft : p->pRight); - } - }else{ - p = pParent; - } - } +static int doubleConsonant(const char *z){ + return isConsonant(z) && z[0]==z[1]; } -/**************************************************************************** -***************************************************************************** -** Everything after this point is just test code. +/* +** Return TRUE if the word ends with three letters which +** are consonant-vowel-consonent and where the final consonant +** is not 'w', 'x', or 'y'. +** +** The word is reversed here. So we are really checking the +** first three letters and the first one cannot be in [wxy]. */ - -#ifdef SQLITE_TEST - -/* #include <stdio.h> */ +static int star_oh(const char *z){ + return + isConsonant(z) && + z[0]!='w' && z[0]!='x' && z[0]!='y' && + isVowel(z+1) && + isConsonant(z+2); +} /* -** Function to query the hash-table of tokenizers (see README.tokenizers). +** If the word ends with zFrom and xCond() is true for the stem +** of the word that preceeds the zFrom ending, then change the +** ending to zTo. +** +** The input word *pz and zFrom are both in reverse order. zTo +** is in normal order. +** +** Return TRUE if zFrom matches. Return FALSE if zFrom does not +** match. Not that TRUE is returned even if xCond() fails and +** no substitution occurs. */ -static int queryTestTokenizer( - sqlite3 *db, - const char *zName, - const sqlite3_tokenizer_module **pp +static int stem( + char **pz, /* The word being stemmed (Reversed) */ + const char *zFrom, /* If the ending matches this... (Reversed) */ + const char *zTo, /* ... change the ending to this (not reversed) */ + int (*xCond)(const char*) /* Condition that must be true */ ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); - } + char *z = *pz; + while( *zFrom && *zFrom==*z ){ z++; zFrom++; } + if( *zFrom!=0 ) return 0; + if( xCond && !xCond(z) ) return 1; + while( *zTo ){ + *(--z) = *(zTo++); } - - return sqlite3_finalize(pStmt); + *pz = z; + return 1; } /* -** Return a pointer to a buffer containing a text representation of the -** expression passed as the first argument. The buffer is obtained from -** sqlite3_malloc(). It is the responsibility of the caller to use -** sqlite3_free() to release the memory. If an OOM condition is encountered, -** NULL is returned. -** -** If the second argument is not NULL, then its contents are prepended to -** the returned expression text and then freed using sqlite3_free(). +** This is the fallback stemmer used when the porter stemmer is +** inappropriate. The input word is copied into the output with +** US-ASCII case folding. If the input word is too long (more +** than 20 bytes if it contains no digits or more than 6 bytes if +** it contains digits) then word is truncated to 20 or 6 bytes +** by taking 10 or 3 bytes from the beginning and end. */ -static char *exprToString(Fts3Expr *pExpr, char *zBuf){ - if( pExpr==0 ){ - return sqlite3_mprintf(""); +static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, mx, j; + int hasDigit = 0; + for(i=0; i<nIn; i++){ + char c = zIn[i]; + if( c>='A' && c<='Z' ){ + zOut[i] = c - 'A' + 'a'; + }else{ + if( c>='0' && c<='9' ) hasDigit = 1; + zOut[i] = c; + } } - switch( pExpr->eType ){ - case FTSQUERY_PHRASE: { - Fts3Phrase *pPhrase = pExpr->pPhrase; - int i; - zBuf = sqlite3_mprintf( - "%zPHRASE %d 0", zBuf, pPhrase->iColumn); - for(i=0; zBuf && i<pPhrase->nToken; i++){ - zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, - pPhrase->aToken[i].n, pPhrase->aToken[i].z, - (pPhrase->aToken[i].isPrefix?"+":"") - ); - } - return zBuf; + mx = hasDigit ? 3 : 10; + if( nIn>mx*2 ){ + for(j=mx, i=nIn-mx; i<nIn; i++, j++){ + zOut[j] = zOut[i]; } - - case FTSQUERY_NEAR: - zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); - break; - case FTSQUERY_NOT: - zBuf = sqlite3_mprintf("%zNOT ", zBuf); - break; - case FTSQUERY_AND: - zBuf = sqlite3_mprintf("%zAND ", zBuf); - break; - case FTSQUERY_OR: - zBuf = sqlite3_mprintf("%zOR ", zBuf); - break; + i = j; } - - if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); - if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); - if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); - - if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); - if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); - - return zBuf; + zOut[i] = 0; + *pnOut = i; } + /* -** This is the implementation of a scalar SQL function used to test the -** expression parser. It should be called as follows: +** Stem the input word zIn[0..nIn-1]. Store the output in zOut. +** zOut is at least big enough to hold nIn bytes. Write the actual +** size of the output word (exclusive of the '\0' terminator) into *pnOut. ** -** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); +** Any upper-case characters in the US-ASCII character set ([A-Z]) +** are converted to lower case. Upper-case UTF characters are +** unchanged. ** -** The first argument, <tokenizer>, is the name of the fts3 tokenizer used -** to parse the query expression (see README.tokenizers). The second argument -** is the query expression to parse. Each subsequent argument is the name -** of a column of the fts3 table that the query expression may refer to. -** For example: +** Words that are longer than about 20 bytes are stemmed by retaining +** a few bytes from the beginning and the end of the word. If the +** word contains digits, 3 bytes are taken from the beginning and +** 3 bytes from the end. For long words without digits, 10 bytes +** are taken from each end. US-ASCII case folding still applies. +** +** If the input word contains not digits but does characters not +** in [a-zA-Z] then no stemming is attempted and this routine just +** copies the input into the input into the output with US-ASCII +** case folding. ** -** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); +** Stemming never increases the length of the word. So there is +** no chance of overflowing the zOut buffer. */ -static void fts3ExprTest( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - sqlite3_tokenizer_module const *pModule = 0; - sqlite3_tokenizer *pTokenizer = 0; - int rc; - char **azCol = 0; - const char *zExpr; - int nExpr; - int nCol; - int ii; - Fts3Expr *pExpr; - char *zBuf = 0; - sqlite3 *db = sqlite3_context_db_handle(context); - - if( argc<3 ){ - sqlite3_result_error(context, - "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 - ); +static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ + int i, j; + char zReverse[28]; + char *z, *z2; + if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ + /* The word is too big or too small for the porter stemmer. + ** Fallback to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); return; } + for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){ + char c = zIn[i]; + if( c>='A' && c<='Z' ){ + zReverse[j] = c + 'a' - 'A'; + }else if( c>='a' && c<='z' ){ + zReverse[j] = c; + }else{ + /* The use of a character not in [a-zA-Z] means that we fallback + ** to the copy stemmer */ + copy_stemmer(zIn, nIn, zOut, pnOut); + return; + } + } + memset(&zReverse[sizeof(zReverse)-5], 0, 5); + z = &zReverse[j+1]; - rc = queryTestTokenizer(db, - (const char *)sqlite3_value_text(argv[0]), &pModule); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; - }else if( !pModule ){ - sqlite3_result_error(context, "No such tokenizer module", -1); - goto exprtest_out; + + /* Step 1a */ + if( z[0]=='s' ){ + if( + !stem(&z, "sess", "ss", 0) && + !stem(&z, "sei", "i", 0) && + !stem(&z, "ss", "ss", 0) + ){ + z++; + } } - rc = pModule->xCreate(0, 0, &pTokenizer); - assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); - if( rc==SQLITE_NOMEM ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; + /* Step 1b */ + z2 = z; + if( stem(&z, "dee", "ee", m_gt_0) ){ + /* Do nothing. The work was all in the test */ + }else if( + (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) + && z!=z2 + ){ + if( stem(&z, "ta", "ate", 0) || + stem(&z, "lb", "ble", 0) || + stem(&z, "zi", "ize", 0) ){ + /* Do nothing. The work was all in the test */ + }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ + z++; + }else if( m_eq_1(z) && star_oh(z) ){ + *(--z) = 'e'; + } } - pTokenizer->pModule = pModule; - zExpr = (const char *)sqlite3_value_text(argv[1]); - nExpr = sqlite3_value_bytes(argv[1]); - nCol = argc-2; - azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); - if( !azCol ){ - sqlite3_result_error_nomem(context); - goto exprtest_out; + /* Step 1c */ + if( z[0]=='y' && hasVowel(z+1) ){ + z[0] = 'i'; } - for(ii=0; ii<nCol; ii++){ - azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); + + /* Step 2 */ + switch( z[1] ){ + case 'a': + if( !stem(&z, "lanoita", "ate", m_gt_0) ){ + stem(&z, "lanoit", "tion", m_gt_0); + } + break; + case 'c': + if( !stem(&z, "icne", "ence", m_gt_0) ){ + stem(&z, "icna", "ance", m_gt_0); + } + break; + case 'e': + stem(&z, "rezi", "ize", m_gt_0); + break; + case 'g': + stem(&z, "igol", "log", m_gt_0); + break; + case 'l': + if( !stem(&z, "ilb", "ble", m_gt_0) + && !stem(&z, "illa", "al", m_gt_0) + && !stem(&z, "iltne", "ent", m_gt_0) + && !stem(&z, "ile", "e", m_gt_0) + ){ + stem(&z, "ilsuo", "ous", m_gt_0); + } + break; + case 'o': + if( !stem(&z, "noitazi", "ize", m_gt_0) + && !stem(&z, "noita", "ate", m_gt_0) + ){ + stem(&z, "rota", "ate", m_gt_0); + } + break; + case 's': + if( !stem(&z, "msila", "al", m_gt_0) + && !stem(&z, "ssenevi", "ive", m_gt_0) + && !stem(&z, "ssenluf", "ful", m_gt_0) + ){ + stem(&z, "ssensuo", "ous", m_gt_0); + } + break; + case 't': + if( !stem(&z, "itila", "al", m_gt_0) + && !stem(&z, "itivi", "ive", m_gt_0) + ){ + stem(&z, "itilib", "ble", m_gt_0); + } + break; } - if( sqlite3_user_data(context) ){ - char *zDummy = 0; - rc = sqlite3Fts3ExprParse( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy - ); - assert( rc==SQLITE_OK || pExpr==0 ); - sqlite3_free(zDummy); - }else{ - rc = fts3ExprParseUnbalanced( - pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr - ); + /* Step 3 */ + switch( z[0] ){ + case 'e': + if( !stem(&z, "etaci", "ic", m_gt_0) + && !stem(&z, "evita", "", m_gt_0) + ){ + stem(&z, "ezila", "al", m_gt_0); + } + break; + case 'i': + stem(&z, "itici", "ic", m_gt_0); + break; + case 'l': + if( !stem(&z, "laci", "ic", m_gt_0) ){ + stem(&z, "luf", "", m_gt_0); + } + break; + case 's': + stem(&z, "ssen", "", m_gt_0); + break; } - if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ - sqlite3Fts3ExprFree(pExpr); - sqlite3_result_error(context, "Error parsing expression", -1); - }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ - sqlite3_result_error_nomem(context); - }else{ - sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); - sqlite3_free(zBuf); + /* Step 4 */ + switch( z[1] ){ + case 'a': + if( z[0]=='l' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'c': + if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'e': + if( z[0]=='r' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'i': + if( z[0]=='c' && m_gt_1(z+2) ){ + z += 2; + } + break; + case 'l': + if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ + z += 4; + } + break; + case 'n': + if( z[0]=='t' ){ + if( z[2]=='a' ){ + if( m_gt_1(z+3) ){ + z += 3; + } + }else if( z[2]=='e' ){ + if( !stem(&z, "tneme", "", m_gt_1) + && !stem(&z, "tnem", "", m_gt_1) + ){ + stem(&z, "tne", "", m_gt_1); + } + } + } + break; + case 'o': + if( z[0]=='u' ){ + if( m_gt_1(z+2) ){ + z += 2; + } + }else if( z[3]=='s' || z[3]=='t' ){ + stem(&z, "noi", "", m_gt_1); + } + break; + case 's': + if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 't': + if( !stem(&z, "eta", "", m_gt_1) ){ + stem(&z, "iti", "", m_gt_1); + } + break; + case 'u': + if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ + z += 3; + } + break; + case 'v': + case 'z': + if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ + z += 3; + } + break; } - sqlite3Fts3ExprFree(pExpr); + /* Step 5a */ + if( z[0]=='e' ){ + if( m_gt_1(z+1) ){ + z++; + }else if( m_eq_1(z+1) && !star_oh(z+1) ){ + z++; + } + } -exprtest_out: - if( pModule && pTokenizer ){ - rc = pModule->xDestroy(pTokenizer); + /* Step 5b */ + if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ + z++; + } + + /* z[] is now the stemmed word in reverse order. Flip it back + ** around into forward order and return. + */ + *pnOut = i = (int)strlen(z); + zOut[i] = 0; + while( *z ){ + zOut[--i] = *(z++); } - sqlite3_free(azCol); } /* -** Register the query expression parser test function fts3_exprtest() -** with database connection db. +** Characters that can be part of a token. We assume any character +** whose value is greater than 0x80 (any UTF character) can be +** part of a token. In other words, delimiters all must have +** values of 0x7f or lower. */ -SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ - int rc = sqlite3_create_function( - db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 - ); - if( rc==SQLITE_OK ){ - rc = sqlite3_create_function(db, "fts3_exprtest_rebalance", - -1, SQLITE_UTF8, (void *)1, fts3ExprTest, 0, 0 - ); +static const char porterIdChar[] = { +/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ +}; +#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) + +/* +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to porterOpen(). +*/ +static int porterNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ + const char **pzToken, /* OUT: *pzToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; + const char *z = c->zInput; + + while( c->iOffset<c->nInput ){ + int iStartOffset, ch; + + /* Scan past delimiter characters */ + while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int n = c->iOffset-iStartOffset; + if( n>c->nAllocated ){ + char *pNew; + c->nAllocated = n+20; + pNew = sqlite3_realloc(c->zToken, c->nAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->zToken = pNew; + } + porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); + *pzToken = c->zToken; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + return SQLITE_OK; + } } - return rc; + return SQLITE_DONE; +} + +/* +** The set of routines that implement the porter-stemmer tokenizer +*/ +static const sqlite3_tokenizer_module porterTokenizerModule = { + 0, + porterCreate, + porterDestroy, + porterOpen, + porterClose, + porterNext, + 0 +}; + +/* +** Allocate a new porter tokenizer. Return a pointer to the new +** tokenizer in *ppModule +*/ +SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &porterTokenizerModule; } -#endif #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3_expr.c *******************************************/ -/************** Begin file fts3_hash.c ***************************************/ +/************** End of fts3_porter.c *****************************************/ +/************** Begin file fts3_tokenizer.c **********************************/ /* -** 2001 September 22 +** 2007 June 22 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -140733,10 +143270,10 @@ SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -************************************************************************* -** This is the implementation of generic hash-tables used in SQLite. -** We've modified it slightly to serve as a standalone hash table -** implementation for the full-text indexing module. +****************************************************************************** +** +** This is part of an SQLite module implementing full-text search. +** This particular file implements the generic tokenizer interface. */ /* @@ -140748,367 +143285,480 @@ SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* #include <assert.h> */ -/* #include <stdlib.h> */ /* #include <string.h> */ - /* -** Malloc and Free functions +** Implementation of the SQL scalar function for accessing the underlying +** hash table. This function may be called as follows: +** +** SELECT <function-name>(<key-name>); +** SELECT <function-name>(<key-name>, <pointer>); +** +** where <function-name> is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). +** +** If the <pointer> argument is specified, it must be a blob value +** containing a pointer to be stored as the hash data corresponding +** to the string <key-name>. If <pointer> is not specified, then +** the string <key-name> must already exist in the has table. Otherwise, +** an error is returned. +** +** Whether or not the <pointer> argument is specified, the value returned +** is a blob containing the pointer stored as the hash data corresponding +** to string <key-name> (after the hash-table is updated, if applicable). */ -static void *fts3HashMalloc(int n){ - void *p = sqlite3_malloc(n); - if( p ){ - memset(p, 0, n); +static void scalarFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Fts3Hash *pHash; + void *pPtr = 0; + const unsigned char *zName; + int nName; + + assert( argc==1 || argc==2 ); + + pHash = (Fts3Hash *)sqlite3_user_data(context); + + zName = sqlite3_value_text(argv[0]); + nName = sqlite3_value_bytes(argv[0])+1; + + if( argc==2 ){ + void *pOld; + int n = sqlite3_value_bytes(argv[1]); + if( zName==0 || n!=sizeof(pPtr) ){ + sqlite3_result_error(context, "argument type mismatch", -1); + return; + } + pPtr = *(void **)sqlite3_value_blob(argv[1]); + pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); + if( pOld==pPtr ){ + sqlite3_result_error(context, "out of memory", -1); + return; + } + }else{ + if( zName ){ + pPtr = sqlite3Fts3HashFind(pHash, zName, nName); + } + if( !pPtr ){ + char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr, -1); + sqlite3_free(zErr); + return; + } } - return p; + + sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); } -static void fts3HashFree(void *p){ - sqlite3_free(p); + +SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ + static const char isFtsIdChar[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ + }; + return (c&0x80 || isFtsIdChar[(int)(c)]); } -/* Turn bulk memory into a hash table object by initializing the -** fields of the Hash structure. -** -** "pNew" is a pointer to the hash table that is to be initialized. -** keyClass is one of the constants -** FTS3_HASH_BINARY or FTS3_HASH_STRING. The value of keyClass -** determines what kind of key the hash table will use. "copyKey" is -** true if the hash table should make its own private copy of keys and -** false if it should just use the supplied pointer. -*/ -SQLITE_PRIVATE void sqlite3Fts3HashInit(Fts3Hash *pNew, char keyClass, char copyKey){ - assert( pNew!=0 ); - assert( keyClass>=FTS3_HASH_STRING && keyClass<=FTS3_HASH_BINARY ); - pNew->keyClass = keyClass; - pNew->copyKey = copyKey; - pNew->first = 0; - pNew->count = 0; - pNew->htsize = 0; - pNew->ht = 0; +SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ + const char *z1; + const char *z2 = 0; + + /* Find the start of the next token. */ + z1 = zStr; + while( z2==0 ){ + char c = *z1; + switch( c ){ + case '\0': return 0; /* No more tokens here */ + case '\'': + case '"': + case '`': { + z2 = z1; + while( *++z2 && (*z2!=c || *++z2==c) ); + break; + } + case '[': + z2 = &z1[1]; + while( *z2 && z2[0]!=']' ) z2++; + if( *z2 ) z2++; + break; + + default: + if( sqlite3Fts3IsIdChar(*z1) ){ + z2 = &z1[1]; + while( sqlite3Fts3IsIdChar(*z2) ) z2++; + }else{ + z1++; + } + } + } + + *pn = (int)(z2-z1); + return z1; } -/* Remove all entries from a hash table. Reclaim all memory. -** Call this routine to delete a hash table or to reset a hash table -** to the empty state. -*/ -SQLITE_PRIVATE void sqlite3Fts3HashClear(Fts3Hash *pH){ - Fts3HashElem *elem; /* For looping over all elements of the table */ +SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( + Fts3Hash *pHash, /* Tokenizer hash table */ + const char *zArg, /* Tokenizer name */ + sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ + char **pzErr /* OUT: Set to malloced error message */ +){ + int rc; + char *z = (char *)zArg; + int n = 0; + char *zCopy; + char *zEnd; /* Pointer to nul-term of zCopy */ + sqlite3_tokenizer_module *m; - assert( pH!=0 ); - elem = pH->first; - pH->first = 0; - fts3HashFree(pH->ht); - pH->ht = 0; - pH->htsize = 0; - while( elem ){ - Fts3HashElem *next_elem = elem->next; - if( pH->copyKey && elem->pKey ){ - fts3HashFree(elem->pKey); + zCopy = sqlite3_mprintf("%s", zArg); + if( !zCopy ) return SQLITE_NOMEM; + zEnd = &zCopy[strlen(zCopy)]; + + z = (char *)sqlite3Fts3NextToken(zCopy, &n); + if( z==0 ){ + assert( n==0 ); + z = zCopy; + } + z[n] = '\0'; + sqlite3Fts3Dequote(z); + + m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); + if( !m ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); + rc = SQLITE_ERROR; + }else{ + char const **aArg = 0; + int iArg = 0; + z = &z[n+1]; + while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ + int nNew = sizeof(char *)*(iArg+1); + char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); + if( !aNew ){ + sqlite3_free(zCopy); + sqlite3_free((void *)aArg); + return SQLITE_NOMEM; + } + aArg = aNew; + aArg[iArg++] = z; + z[n] = '\0'; + sqlite3Fts3Dequote(z); + z = &z[n+1]; } - fts3HashFree(elem); - elem = next_elem; + rc = m->xCreate(iArg, aArg, ppTok); + assert( rc!=SQLITE_OK || *ppTok ); + if( rc!=SQLITE_OK ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); + }else{ + (*ppTok)->pModule = m; + } + sqlite3_free((void *)aArg); + } + + sqlite3_free(zCopy); + return rc; +} + + +#ifdef SQLITE_TEST + +#include <tcl.h> +/* #include <string.h> */ + +/* +** Implementation of a special SQL scalar function for testing tokenizers +** designed to be used in concert with the Tcl testing framework. This +** function must be called with two or more arguments: +** +** SELECT <function-name>(<key-name>, ..., <input-string>); +** +** where <function-name> is the name passed as the second argument +** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') +** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). +** +** The return value is a string that may be interpreted as a Tcl +** list. For each token in the <input-string>, three elements are +** added to the returned list. The first is the token position, the +** second is the token text (folded, stemmed, etc.) and the third is the +** substring of <input-string> associated with the token. For example, +** using the built-in "simple" tokenizer: +** +** SELECT fts_tokenizer_test('simple', 'I don't see how'); +** +** will return the string: +** +** "{0 i I 1 dont don't 2 see see 3 how how}" +** +*/ +static void testFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +){ + Fts3Hash *pHash; + sqlite3_tokenizer_module *p; + sqlite3_tokenizer *pTokenizer = 0; + sqlite3_tokenizer_cursor *pCsr = 0; + + const char *zErr = 0; + + const char *zName; + int nName; + const char *zInput; + int nInput; + + const char *azArg[64]; + + const char *zToken; + int nToken = 0; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int i; + + Tcl_Obj *pRet; + + if( argc<2 ){ + sqlite3_result_error(context, "insufficient arguments", -1); + return; + } + + nName = sqlite3_value_bytes(argv[0]); + zName = (const char *)sqlite3_value_text(argv[0]); + nInput = sqlite3_value_bytes(argv[argc-1]); + zInput = (const char *)sqlite3_value_text(argv[argc-1]); + + pHash = (Fts3Hash *)sqlite3_user_data(context); + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + + if( !p ){ + char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); + sqlite3_result_error(context, zErr2, -1); + sqlite3_free(zErr2); + return; + } + + pRet = Tcl_NewObj(); + Tcl_IncrRefCount(pRet); + + for(i=1; i<argc-1; i++){ + azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); + } + + if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ + zErr = "error in xCreate()"; + goto finish; + } + pTokenizer->pModule = p; + if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ + zErr = "error in xOpen()"; + goto finish; } - pH->count = 0; -} -/* -** Hash and comparison functions when the mode is FTS3_HASH_STRING -*/ -static int fts3StrHash(const void *pKey, int nKey){ - const char *z = (const char *)pKey; - unsigned h = 0; - if( nKey<=0 ) nKey = (int) strlen(z); - while( nKey > 0 ){ - h = (h<<3) ^ h ^ *z++; - nKey--; + while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ + Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); + zToken = &zInput[iStart]; + nToken = iEnd-iStart; + Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); } - return (int)(h & 0x7fffffff); -} -static int fts3StrCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return strncmp((const char*)pKey1,(const char*)pKey2,n1); -} -/* -** Hash and comparison functions when the mode is FTS3_HASH_BINARY -*/ -static int fts3BinHash(const void *pKey, int nKey){ - int h = 0; - const char *z = (const char *)pKey; - while( nKey-- > 0 ){ - h = (h<<3) ^ h ^ *(z++); + if( SQLITE_OK!=p->xClose(pCsr) ){ + zErr = "error in xClose()"; + goto finish; } - return h & 0x7fffffff; -} -static int fts3BinCompare(const void *pKey1, int n1, const void *pKey2, int n2){ - if( n1!=n2 ) return 1; - return memcmp(pKey1,pKey2,n1); -} - -/* -** Return a pointer to the appropriate hash function given the key class. -** -** The C syntax in this function definition may be unfamilar to some -** programmers, so we provide the following additional explanation: -** -** The name of the function is "ftsHashFunction". The function takes a -** single parameter "keyClass". The return value of ftsHashFunction() -** is a pointer to another function. Specifically, the return value -** of ftsHashFunction() is a pointer to a function that takes two parameters -** with types "const void*" and "int" and returns an "int". -*/ -static int (*ftsHashFunction(int keyClass))(const void*,int){ - if( keyClass==FTS3_HASH_STRING ){ - return &fts3StrHash; - }else{ - assert( keyClass==FTS3_HASH_BINARY ); - return &fts3BinHash; + if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ + zErr = "error in xDestroy()"; + goto finish; } -} -/* -** Return a pointer to the appropriate hash function given the key class. -** -** For help in interpreted the obscure C code in the function definition, -** see the header comment on the previous function. -*/ -static int (*ftsCompareFunction(int keyClass))(const void*,int,const void*,int){ - if( keyClass==FTS3_HASH_STRING ){ - return &fts3StrCompare; +finish: + if( zErr ){ + sqlite3_result_error(context, zErr, -1); }else{ - assert( keyClass==FTS3_HASH_BINARY ); - return &fts3BinCompare; + sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); } + Tcl_DecrRefCount(pRet); } -/* Link an element into the hash table -*/ -static void fts3HashInsertElement( - Fts3Hash *pH, /* The complete hash table */ - struct _fts3ht *pEntry, /* The entry into which pNew is inserted */ - Fts3HashElem *pNew /* The element to be inserted */ +static +int registerTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module *p ){ - Fts3HashElem *pHead; /* First element already in pEntry */ - pHead = pEntry->chain; - if( pHead ){ - pNew->next = pHead; - pNew->prev = pHead->prev; - if( pHead->prev ){ pHead->prev->next = pNew; } - else { pH->first = pNew; } - pHead->prev = pNew; - }else{ - pNew->next = pH->first; - if( pH->first ){ pH->first->prev = pNew; } - pNew->prev = 0; - pH->first = pNew; - } - pEntry->count++; - pEntry->chain = pNew; -} + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } -/* Resize the hash table so that it cantains "new_size" buckets. -** "new_size" must be a power of 2. The hash table might fail -** to resize if sqliteMalloc() fails. -** -** Return non-zero if a memory allocation error occurs. -*/ -static int fts3Rehash(Fts3Hash *pH, int new_size){ - struct _fts3ht *new_ht; /* The new hash table */ - Fts3HashElem *elem, *next_elem; /* For looping over existing elements */ - int (*xHash)(const void*,int); /* The hash function */ + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); + sqlite3_step(pStmt); - assert( (new_size & (new_size-1))==0 ); - new_ht = (struct _fts3ht *)fts3HashMalloc( new_size*sizeof(struct _fts3ht) ); - if( new_ht==0 ) return 1; - fts3HashFree(pH->ht); - pH->ht = new_ht; - pH->htsize = new_size; - xHash = ftsHashFunction(pH->keyClass); - for(elem=pH->first, pH->first=0; elem; elem = next_elem){ - int h = (*xHash)(elem->pKey, elem->nKey) & (new_size-1); - next_elem = elem->next; - fts3HashInsertElement(pH, &new_ht[h], elem); - } - return 0; + return sqlite3_finalize(pStmt); } -/* This function (for internal use only) locates an element in an -** hash table that matches the given key. The hash for this key has -** already been computed and is passed as the 4th parameter. -*/ -static Fts3HashElem *fts3FindElementByHash( - const Fts3Hash *pH, /* The pH to be searched */ - const void *pKey, /* The key we are searching for */ - int nKey, - int h /* The hash for this key. */ +static +int queryTokenizer( + sqlite3 *db, + char *zName, + const sqlite3_tokenizer_module **pp ){ - Fts3HashElem *elem; /* Used to loop thru the element list */ - int count; /* Number of elements left to test */ - int (*xCompare)(const void*,int,const void*,int); /* comparison function */ + int rc; + sqlite3_stmt *pStmt; + const char zSql[] = "SELECT fts3_tokenizer(?)"; - if( pH->ht ){ - struct _fts3ht *pEntry = &pH->ht[h]; - elem = pEntry->chain; - count = pEntry->count; - xCompare = ftsCompareFunction(pH->keyClass); - while( count-- && elem ){ - if( (*xCompare)(elem->pKey,elem->nKey,pKey,nKey)==0 ){ - return elem; - } - elem = elem->next; + *pp = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc!=SQLITE_OK ){ + return rc; + } + + sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ + memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); } } - return 0; + + return sqlite3_finalize(pStmt); } -/* Remove a single entry from the hash table given a pointer to that -** element and a hash on the element's key. +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); + +/* +** Implementation of the scalar function fts3_tokenizer_internal_test(). +** This function is used for testing only, it is not included in the +** build unless SQLITE_TEST is defined. +** +** The purpose of this is to test that the fts3_tokenizer() function +** can be used as designed by the C-code in the queryTokenizer and +** registerTokenizer() functions above. These two functions are repeated +** in the README.tokenizer file as an example, so it is important to +** test them. +** +** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar +** function with no arguments. An assert() will fail if a problem is +** detected. i.e.: +** +** SELECT fts3_tokenizer_internal_test(); +** */ -static void fts3RemoveElementByHash( - Fts3Hash *pH, /* The pH containing "elem" */ - Fts3HashElem* elem, /* The element to be removed from the pH */ - int h /* Hash value for the element */ +static void intTestFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv ){ - struct _fts3ht *pEntry; - if( elem->prev ){ - elem->prev->next = elem->next; - }else{ - pH->first = elem->next; - } - if( elem->next ){ - elem->next->prev = elem->prev; - } - pEntry = &pH->ht[h]; - if( pEntry->chain==elem ){ - pEntry->chain = elem->next; - } - pEntry->count--; - if( pEntry->count<=0 ){ - pEntry->chain = 0; - } - if( pH->copyKey && elem->pKey ){ - fts3HashFree(elem->pKey); - } - fts3HashFree( elem ); - pH->count--; - if( pH->count<=0 ){ - assert( pH->first==0 ); - assert( pH->count==0 ); - fts3HashClear(pH); - } -} + int rc; + const sqlite3_tokenizer_module *p1; + const sqlite3_tokenizer_module *p2; + sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); -SQLITE_PRIVATE Fts3HashElem *sqlite3Fts3HashFindElem( - const Fts3Hash *pH, - const void *pKey, - int nKey -){ - int h; /* A hash on key */ - int (*xHash)(const void*,int); /* The hash function */ + UNUSED_PARAMETER(argc); + UNUSED_PARAMETER(argv); - if( pH==0 || pH->ht==0 ) return 0; - xHash = ftsHashFunction(pH->keyClass); - assert( xHash!=0 ); - h = (*xHash)(pKey,nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - return fts3FindElementByHash(pH,pKey,nKey, h & (pH->htsize-1)); -} + /* Test the query function */ + sqlite3Fts3SimpleTokenizerModule(&p1); + rc = queryTokenizer(db, "simple", &p2); + assert( rc==SQLITE_OK ); + assert( p1==p2 ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_ERROR ); + assert( p2==0 ); + assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); -/* -** Attempt to locate an element of the hash table pH with a key -** that matches pKey,nKey. Return the data for this element if it is -** found, or NULL if there is no match. -*/ -SQLITE_PRIVATE void *sqlite3Fts3HashFind(const Fts3Hash *pH, const void *pKey, int nKey){ - Fts3HashElem *pElem; /* The element that matches key (if any) */ + /* Test the storage function */ + rc = registerTokenizer(db, "nosuchtokenizer", p1); + assert( rc==SQLITE_OK ); + rc = queryTokenizer(db, "nosuchtokenizer", &p2); + assert( rc==SQLITE_OK ); + assert( p2==p1 ); - pElem = sqlite3Fts3HashFindElem(pH, pKey, nKey); - return pElem ? pElem->data : 0; + sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); } -/* Insert an element into the hash table pH. The key is pKey,nKey -** and the data is "data". +#endif + +/* +** Set up SQL objects in database db used to access the contents of +** the hash table pointed to by argument pHash. The hash table must +** been initialized to use string keys, and to take a private copy +** of the key when a value is inserted. i.e. by a call similar to: ** -** If no element exists with a matching key, then a new -** element is created. A copy of the key is made if the copyKey -** flag is set. NULL is returned. +** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); ** -** If another element already exists with the same key, then the -** new data replaces the old data and the old data is returned. -** The key is not copied in this instance. If a malloc fails, then -** the new data is returned and the hash table is unchanged. +** This function adds a scalar function (see header comment above +** scalarFunc() in this file for details) and, if ENABLE_TABLE is +** defined at compilation time, a temporary virtual table (see header +** comment above struct HashTableVtab) to the database schema. Both +** provide read/write access to the contents of *pHash. ** -** If the "data" parameter to this function is NULL, then the -** element corresponding to "key" is removed from the hash table. +** The third argument to this function, zName, is used as the name +** of both the scalar and, if created, the virtual table. */ -SQLITE_PRIVATE void *sqlite3Fts3HashInsert( - Fts3Hash *pH, /* The hash table to insert into */ - const void *pKey, /* The key */ - int nKey, /* Number of bytes in the key */ - void *data /* The data */ +SQLITE_PRIVATE int sqlite3Fts3InitHashTable( + sqlite3 *db, + Fts3Hash *pHash, + const char *zName ){ - int hraw; /* Raw hash value of the key */ - int h; /* the hash of the key modulo hash table size */ - Fts3HashElem *elem; /* Used to loop thru the element list */ - Fts3HashElem *new_elem; /* New element added to the pH */ - int (*xHash)(const void*,int); /* The hash function */ + int rc = SQLITE_OK; + void *p = (void *)pHash; + const int any = SQLITE_ANY; - assert( pH!=0 ); - xHash = ftsHashFunction(pH->keyClass); - assert( xHash!=0 ); - hraw = (*xHash)(pKey, nKey); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - elem = fts3FindElementByHash(pH,pKey,nKey,h); - if( elem ){ - void *old_data = elem->data; - if( data==0 ){ - fts3RemoveElementByHash(pH,elem,h); - }else{ - elem->data = data; - } - return old_data; +#ifdef SQLITE_TEST + char *zTest = 0; + char *zTest2 = 0; + void *pdb = (void *)db; + zTest = sqlite3_mprintf("%s_test", zName); + zTest2 = sqlite3_mprintf("%s_internal_test", zName); + if( !zTest || !zTest2 ){ + rc = SQLITE_NOMEM; } - if( data==0 ) return 0; - if( (pH->htsize==0 && fts3Rehash(pH,8)) - || (pH->count>=pH->htsize && fts3Rehash(pH, pH->htsize*2)) - ){ - pH->count = 0; - return data; +#endif + + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0); } - assert( pH->htsize>0 ); - new_elem = (Fts3HashElem*)fts3HashMalloc( sizeof(Fts3HashElem) ); - if( new_elem==0 ) return data; - if( pH->copyKey && pKey!=0 ){ - new_elem->pKey = fts3HashMalloc( nKey ); - if( new_elem->pKey==0 ){ - fts3HashFree(new_elem); - return data; - } - memcpy((void*)new_elem->pKey, pKey, nKey); - }else{ - new_elem->pKey = (void*)pKey; + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); } - new_elem->nKey = nKey; - pH->count++; - assert( pH->htsize>0 ); - assert( (pH->htsize & (pH->htsize-1))==0 ); - h = hraw & (pH->htsize-1); - fts3HashInsertElement(pH, &pH->ht[h], new_elem); - new_elem->data = data; - return 0; +#ifdef SQLITE_TEST + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); + } + if( SQLITE_OK==rc ){ + rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); + } +#endif + +#ifdef SQLITE_TEST + sqlite3_free(zTest); + sqlite3_free(zTest2); +#endif + + return rc; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3_hash.c *******************************************/ -/************** Begin file fts3_porter.c *************************************/ +/************** End of fts3_tokenizer.c **************************************/ +/************** Begin file fts3_tokenizer1.c *********************************/ /* -** 2006 September 30 +** 2006 Oct 10 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -141117,9 +143767,9 @@ SQLITE_PRIVATE void *sqlite3Fts3HashInsert( ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -************************************************************************* -** Implementation of the full-text-search tokenizer that implements -** a Porter stemmer. +****************************************************************************** +** +** Implementation of the "simple" full-text-search tokenizer. */ /* @@ -141131,6 +143781,7 @@ SQLITE_PRIVATE void *sqlite3Fts3HashInsert( ** * The FTS3 module is being built into the core of ** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). */ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) /* #include <assert.h> */ @@ -141138,43 +143789,68 @@ SQLITE_PRIVATE void *sqlite3Fts3HashInsert( /* #include <stdio.h> */ /* #include <string.h> */ +/* #include "fts3_tokenizer.h" */ -/* -** Class derived from sqlite3_tokenizer -*/ -typedef struct porter_tokenizer { - sqlite3_tokenizer base; /* Base class */ -} porter_tokenizer; +typedef struct simple_tokenizer { + sqlite3_tokenizer base; + char delim[128]; /* flag ASCII delimiters */ +} simple_tokenizer; -/* -** Class derived from sqlite3_tokenizer_cursor -*/ -typedef struct porter_tokenizer_cursor { +typedef struct simple_tokenizer_cursor { sqlite3_tokenizer_cursor base; - const char *zInput; /* input we are tokenizing */ - int nInput; /* size of the input */ - int iOffset; /* current position in zInput */ + const char *pInput; /* input we are tokenizing */ + int nBytes; /* size of the input */ + int iOffset; /* current position in pInput */ int iToken; /* index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAllocated; /* space allocated to zToken buffer */ -} porter_tokenizer_cursor; + char *pToken; /* storage for current token */ + int nTokenAllocated; /* space allocated to zToken buffer */ +} simple_tokenizer_cursor; + +static int simpleDelim(simple_tokenizer *t, unsigned char c){ + return c<0x80 && t->delim[c]; +} +static int fts3_isalnum(int x){ + return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); +} /* ** Create a new tokenizer instance. */ -static int porterCreate( +static int simpleCreate( int argc, const char * const *argv, sqlite3_tokenizer **ppTokenizer ){ - porter_tokenizer *t; - - UNUSED_PARAMETER(argc); - UNUSED_PARAMETER(argv); + simple_tokenizer *t; - t = (porter_tokenizer *) sqlite3_malloc(sizeof(*t)); + t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); if( t==NULL ) return SQLITE_NOMEM; memset(t, 0, sizeof(*t)); + + /* TODO(shess) Delimiters need to remain the same from run to run, + ** else we need to reindex. One solution would be a meta-table to + ** track such information in the database, then we'd only want this + ** information on the initial create. + */ + if( argc>1 ){ + int i, n = (int)strlen(argv[1]); + for(i=0; i<n; i++){ + unsigned char ch = argv[1][i]; + /* We explicitly don't support UTF-8 delimiters for now. */ + if( ch>=0x80 ){ + sqlite3_free(t); + return SQLITE_ERROR; + } + t->delim[ch] = 1; + } + } else { + /* Mark non-alphanumeric ASCII characters as delimiters */ + int i; + for(i=1; i<0x80; i++){ + t->delim[i] = !fts3_isalnum(i) ? -1 : 0; + } + } + *ppTokenizer = &t->base; return SQLITE_OK; } @@ -141182,41 +143858,41 @@ static int porterCreate( /* ** Destroy a tokenizer */ -static int porterDestroy(sqlite3_tokenizer *pTokenizer){ +static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ sqlite3_free(pTokenizer); return SQLITE_OK; } /* ** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is zInput[0..nInput-1]. A cursor +** string to be tokenized is pInput[0..nBytes-1]. A cursor ** used to incrementally tokenize this string is returned in ** *ppCursor. */ -static int porterOpen( +static int simpleOpen( sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, int nInput, /* String to be tokenized */ + const char *pInput, int nBytes, /* String to be tokenized */ sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ ){ - porter_tokenizer_cursor *c; + simple_tokenizer_cursor *c; UNUSED_PARAMETER(pTokenizer); - c = (porter_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); + c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); if( c==NULL ) return SQLITE_NOMEM; - c->zInput = zInput; - if( zInput==0 ){ - c->nInput = 0; - }else if( nInput<0 ){ - c->nInput = (int)strlen(zInput); + c->pInput = pInput; + if( pInput==0 ){ + c->nBytes = 0; + }else if( nBytes<0 ){ + c->nBytes = (int)strlen(pInput); }else{ - c->nInput = nInput; + c->nBytes = nBytes; } c->iOffset = 0; /* start tokenizing at the beginning */ c->iToken = 0; - c->zToken = NULL; /* no space allocated, yet. */ - c->nAllocated = 0; + c->pToken = NULL; /* no space allocated, yet. */ + c->nTokenAllocated = 0; *ppCursor = &c->base; return SQLITE_OK; @@ -141224,554 +143900,559 @@ static int porterOpen( /* ** Close a tokenization cursor previously opened by a call to -** porterOpen() above. +** simpleOpen() above. */ -static int porterClose(sqlite3_tokenizer_cursor *pCursor){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - sqlite3_free(c->zToken); +static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + sqlite3_free(c->pToken); sqlite3_free(c); return SQLITE_OK; } + /* -** Vowel or consonant +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). */ -static const char cType[] = { - 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, - 1, 1, 1, 2, 1 +static int simpleNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; + simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; + unsigned char *p = (unsigned char *)c->pInput; + + while( c->iOffset<c->nBytes ){ + int iStartOffset; + + /* Scan past delimiter characters */ + while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + /* Count non-delimiter characters. */ + iStartOffset = c->iOffset; + while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ + c->iOffset++; + } + + if( c->iOffset>iStartOffset ){ + int i, n = c->iOffset-iStartOffset; + if( n>c->nTokenAllocated ){ + char *pNew; + c->nTokenAllocated = n+20; + pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); + if( !pNew ) return SQLITE_NOMEM; + c->pToken = pNew; + } + for(i=0; i<n; i++){ + /* TODO(shess) This needs expansion to handle UTF-8 + ** case-insensitivity. + */ + unsigned char ch = p[iStartOffset+i]; + c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); + } + *ppToken = c->pToken; + *pnBytes = n; + *piStartOffset = iStartOffset; + *piEndOffset = c->iOffset; + *piPosition = c->iToken++; + + return SQLITE_OK; + } + } + return SQLITE_DONE; +} + +/* +** The set of routines that implement the simple tokenizer +*/ +static const sqlite3_tokenizer_module simpleTokenizerModule = { + 0, + simpleCreate, + simpleDestroy, + simpleOpen, + simpleClose, + simpleNext, + 0, }; /* -** isConsonant() and isVowel() determine if their first character in -** the string they point to is a consonant or a vowel, according -** to Porter ruls. -** -** A consonate is any letter other than 'a', 'e', 'i', 'o', or 'u'. -** 'Y' is a consonant unless it follows another consonant, -** in which case it is a vowel. -** -** In these routine, the letters are in reverse order. So the 'y' rule -** is that 'y' is a consonant unless it is followed by another -** consonent. +** Allocate a new simple tokenizer. Return a pointer to the new +** tokenizer in *ppModule */ -static int isVowel(const char*); -static int isConsonant(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return j; - return z[1]==0 || isVowel(z + 1); -} -static int isVowel(const char *z){ - int j; - char x = *z; - if( x==0 ) return 0; - assert( x>='a' && x<='z' ); - j = cType[x-'a']; - if( j<2 ) return 1-j; - return isConsonant(z + 1); +SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( + sqlite3_tokenizer_module const**ppModule +){ + *ppModule = &simpleTokenizerModule; } +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ + +/************** End of fts3_tokenizer1.c *************************************/ +/************** Begin file fts3_tokenize_vtab.c ******************************/ /* -** Let any sequence of one or more vowels be represented by V and let -** C be sequence of one or more consonants. Then every word can be -** represented as: +** 2013 Apr 22 ** -** [C] (VC){m} [V] +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** In prose: A word is an optional consonant followed by zero or -** vowel-consonant pairs followed by an optional vowel. "m" is the -** number of vowel consonant pairs. This routine computes the value -** of m for the first i bytes of a word. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. ** -** Return true if the m-value for z is 1 or more. In other words, -** return true if z contains at least one vowel that is followed -** by a consonant. +****************************************************************************** +** +** This file contains code for the "fts3tokenize" virtual table module. +** An fts3tokenize virtual table is created as follows: +** +** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize( +** <tokenizer-name>, <arg-1>, ... +** ); +** +** The table created has the following schema: +** +** CREATE TABLE <tbl>(input, token, start, end, position) +** +** When queried, the query must include a WHERE clause of type: +** +** input = <string> +** +** The virtual table module tokenizes this <string>, using the FTS3 +** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE +** statement and returns one row for each token in the result. With +** fields set as follows: +** +** input: Always set to a copy of <string> +** token: A token from the input. +** start: Byte offset of the token within the input <string>. +** end: Byte offset of the byte immediately following the end of the +** token within the input string. +** pos: Token offset of token within input. ** -** In this routine z[] is in reverse order. So we are really looking -** for an instance of a consonant followed by a vowel. */ -static int m_gt_0(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) -/* Like mgt0 above except we are looking for a value of m which is -** exactly 1 -*/ -static int m_eq_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 1; - while( isConsonant(z) ){ z++; } - return *z==0; -} +/* #include <string.h> */ +/* #include <assert.h> */ -/* Like mgt0 above except we are looking for a value of m>1 instead -** or m>0 -*/ -static int m_gt_1(const char *z){ - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - if( *z==0 ) return 0; - while( isVowel(z) ){ z++; } - if( *z==0 ) return 0; - while( isConsonant(z) ){ z++; } - return *z!=0; -} +typedef struct Fts3tokTable Fts3tokTable; +typedef struct Fts3tokCursor Fts3tokCursor; /* -** Return TRUE if there is a vowel anywhere within z[0..n-1] +** Virtual table structure. */ -static int hasVowel(const char *z){ - while( isConsonant(z) ){ z++; } - return *z!=0; -} +struct Fts3tokTable { + sqlite3_vtab base; /* Base class used by SQLite core */ + const sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer *pTok; +}; /* -** Return TRUE if the word ends in a double consonant. -** -** The text is reversed here. So we are really looking at -** the first two characters of z[]. +** Virtual table cursor structure. */ -static int doubleConsonant(const char *z){ - return isConsonant(z) && z[0]==z[1]; -} +struct Fts3tokCursor { + sqlite3_vtab_cursor base; /* Base class used by SQLite core */ + char *zInput; /* Input string */ + sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ + int iRowid; /* Current 'rowid' value */ + const char *zToken; /* Current 'token' value */ + int nToken; /* Size of zToken in bytes */ + int iStart; /* Current 'start' value */ + int iEnd; /* Current 'end' value */ + int iPos; /* Current 'pos' value */ +}; /* -** Return TRUE if the word ends with three letters which -** are consonant-vowel-consonent and where the final consonant -** is not 'w', 'x', or 'y'. -** -** The word is reversed here. So we are really checking the -** first three letters and the first one cannot be in [wxy]. +** Query FTS for the tokenizer implementation named zName. */ -static int star_oh(const char *z){ - return - isConsonant(z) && - z[0]!='w' && z[0]!='x' && z[0]!='y' && - isVowel(z+1) && - isConsonant(z+2); +static int fts3tokQueryTokenizer( + Fts3Hash *pHash, + const char *zName, + const sqlite3_tokenizer_module **pp, + char **pzErr +){ + sqlite3_tokenizer_module *p; + int nName = (int)strlen(zName); + + p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); + if( !p ){ + sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); + return SQLITE_ERROR; + } + + *pp = p; + return SQLITE_OK; } /* -** If the word ends with zFrom and xCond() is true for the stem -** of the word that preceeds the zFrom ending, then change the -** ending to zTo. -** -** The input word *pz and zFrom are both in reverse order. zTo -** is in normal order. +** The second argument, argv[], is an array of pointers to nul-terminated +** strings. This function makes a copy of the array and strings into a +** single block of memory. It then dequotes any of the strings that appear +** to be quoted. ** -** Return TRUE if zFrom matches. Return FALSE if zFrom does not -** match. Not that TRUE is returned even if xCond() fails and -** no substitution occurs. +** If successful, output parameter *pazDequote is set to point at the +** array of dequoted strings and SQLITE_OK is returned. The caller is +** responsible for eventually calling sqlite3_free() to free the array +** in this case. Or, if an error occurs, an SQLite error code is returned. +** The final value of *pazDequote is undefined in this case. */ -static int stem( - char **pz, /* The word being stemmed (Reversed) */ - const char *zFrom, /* If the ending matches this... (Reversed) */ - const char *zTo, /* ... change the ending to this (not reversed) */ - int (*xCond)(const char*) /* Condition that must be true */ +static int fts3tokDequoteArray( + int argc, /* Number of elements in argv[] */ + const char * const *argv, /* Input array */ + char ***pazDequote /* Output array */ ){ - char *z = *pz; - while( *zFrom && *zFrom==*z ){ z++; zFrom++; } - if( *zFrom!=0 ) return 0; - if( xCond && !xCond(z) ) return 1; - while( *zTo ){ - *(--z) = *(zTo++); - } - *pz = z; - return 1; -} + int rc = SQLITE_OK; /* Return code */ + if( argc==0 ){ + *pazDequote = 0; + }else{ + int i; + int nByte = 0; + char **azDequote; -/* -** This is the fallback stemmer used when the porter stemmer is -** inappropriate. The input word is copied into the output with -** US-ASCII case folding. If the input word is too long (more -** than 20 bytes if it contains no digits or more than 6 bytes if -** it contains digits) then word is truncated to 20 or 6 bytes -** by taking 10 or 3 bytes from the beginning and end. -*/ -static void copy_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, mx, j; - int hasDigit = 0; - for(i=0; i<nIn; i++){ - char c = zIn[i]; - if( c>='A' && c<='Z' ){ - zOut[i] = c - 'A' + 'a'; - }else{ - if( c>='0' && c<='9' ) hasDigit = 1; - zOut[i] = c; + for(i=0; i<argc; i++){ + nByte += (int)(strlen(argv[i]) + 1); } - } - mx = hasDigit ? 3 : 10; - if( nIn>mx*2 ){ - for(j=mx, i=nIn-mx; i<nIn; i++, j++){ - zOut[j] = zOut[i]; + + *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); + if( azDequote==0 ){ + rc = SQLITE_NOMEM; + }else{ + char *pSpace = (char *)&azDequote[argc]; + for(i=0; i<argc; i++){ + int n = (int)strlen(argv[i]); + azDequote[i] = pSpace; + memcpy(pSpace, argv[i], n+1); + sqlite3Fts3Dequote(pSpace); + pSpace += (n+1); + } } - i = j; } - zOut[i] = 0; - *pnOut = i; + + return rc; } +/* +** Schema of the tokenizer table. +*/ +#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" /* -** Stem the input word zIn[0..nIn-1]. Store the output in zOut. -** zOut is at least big enough to hold nIn bytes. Write the actual -** size of the output word (exclusive of the '\0' terminator) into *pnOut. -** -** Any upper-case characters in the US-ASCII character set ([A-Z]) -** are converted to lower case. Upper-case UTF characters are -** unchanged. -** -** Words that are longer than about 20 bytes are stemmed by retaining -** a few bytes from the beginning and the end of the word. If the -** word contains digits, 3 bytes are taken from the beginning and -** 3 bytes from the end. For long words without digits, 10 bytes -** are taken from each end. US-ASCII case folding still applies. -** -** If the input word contains not digits but does characters not -** in [a-zA-Z] then no stemming is attempted and this routine just -** copies the input into the input into the output with US-ASCII -** case folding. +** This function does all the work for both the xConnect and xCreate methods. +** These tables have no persistent representation of their own, so xConnect +** and xCreate are identical operations. ** -** Stemming never increases the length of the word. So there is -** no chance of overflowing the zOut buffer. +** argv[0]: module name +** argv[1]: database name +** argv[2]: table name +** argv[3]: first argument (tokenizer name) */ -static void porter_stemmer(const char *zIn, int nIn, char *zOut, int *pnOut){ - int i, j; - char zReverse[28]; - char *z, *z2; - if( nIn<3 || nIn>=(int)sizeof(zReverse)-7 ){ - /* The word is too big or too small for the porter stemmer. - ** Fallback to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - for(i=0, j=sizeof(zReverse)-6; i<nIn; i++, j--){ - char c = zIn[i]; - if( c>='A' && c<='Z' ){ - zReverse[j] = c + 'a' - 'A'; - }else if( c>='a' && c<='z' ){ - zReverse[j] = c; - }else{ - /* The use of a character not in [a-zA-Z] means that we fallback - ** to the copy stemmer */ - copy_stemmer(zIn, nIn, zOut, pnOut); - return; - } - } - memset(&zReverse[sizeof(zReverse)-5], 0, 5); - z = &zReverse[j+1]; - +static int fts3tokConnectMethod( + sqlite3 *db, /* Database connection */ + void *pHash, /* Hash table of tokenizers */ + int argc, /* Number of elements in argv array */ + const char * const *argv, /* xCreate/xConnect argument array */ + sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ + char **pzErr /* OUT: sqlite3_malloc'd error message */ +){ + Fts3tokTable *pTab = 0; + const sqlite3_tokenizer_module *pMod = 0; + sqlite3_tokenizer *pTok = 0; + int rc; + char **azDequote = 0; + int nDequote; - /* Step 1a */ - if( z[0]=='s' ){ - if( - !stem(&z, "sess", "ss", 0) && - !stem(&z, "sei", "i", 0) && - !stem(&z, "ss", "ss", 0) - ){ - z++; - } - } + rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); + if( rc!=SQLITE_OK ) return rc; - /* Step 1b */ - z2 = z; - if( stem(&z, "dee", "ee", m_gt_0) ){ - /* Do nothing. The work was all in the test */ - }else if( - (stem(&z, "gni", "", hasVowel) || stem(&z, "de", "", hasVowel)) - && z!=z2 - ){ - if( stem(&z, "ta", "ate", 0) || - stem(&z, "lb", "ble", 0) || - stem(&z, "zi", "ize", 0) ){ - /* Do nothing. The work was all in the test */ - }else if( doubleConsonant(z) && (*z!='l' && *z!='s' && *z!='z') ){ - z++; - }else if( m_eq_1(z) && star_oh(z) ){ - *(--z) = 'e'; - } - } + nDequote = argc-3; + rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); - /* Step 1c */ - if( z[0]=='y' && hasVowel(z+1) ){ - z[0] = 'i'; + if( rc==SQLITE_OK ){ + const char *zModule; + if( nDequote<1 ){ + zModule = "simple"; + }else{ + zModule = azDequote[0]; + } + rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); } - /* Step 2 */ - switch( z[1] ){ - case 'a': - if( !stem(&z, "lanoita", "ate", m_gt_0) ){ - stem(&z, "lanoit", "tion", m_gt_0); - } - break; - case 'c': - if( !stem(&z, "icne", "ence", m_gt_0) ){ - stem(&z, "icna", "ance", m_gt_0); - } - break; - case 'e': - stem(&z, "rezi", "ize", m_gt_0); - break; - case 'g': - stem(&z, "igol", "log", m_gt_0); - break; - case 'l': - if( !stem(&z, "ilb", "ble", m_gt_0) - && !stem(&z, "illa", "al", m_gt_0) - && !stem(&z, "iltne", "ent", m_gt_0) - && !stem(&z, "ile", "e", m_gt_0) - ){ - stem(&z, "ilsuo", "ous", m_gt_0); - } - break; - case 'o': - if( !stem(&z, "noitazi", "ize", m_gt_0) - && !stem(&z, "noita", "ate", m_gt_0) - ){ - stem(&z, "rota", "ate", m_gt_0); - } - break; - case 's': - if( !stem(&z, "msila", "al", m_gt_0) - && !stem(&z, "ssenevi", "ive", m_gt_0) - && !stem(&z, "ssenluf", "ful", m_gt_0) - ){ - stem(&z, "ssensuo", "ous", m_gt_0); - } - break; - case 't': - if( !stem(&z, "itila", "al", m_gt_0) - && !stem(&z, "itivi", "ive", m_gt_0) - ){ - stem(&z, "itilib", "ble", m_gt_0); - } - break; + assert( (rc==SQLITE_OK)==(pMod!=0) ); + if( rc==SQLITE_OK ){ + const char * const *azArg = (const char * const *)&azDequote[1]; + rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); } - /* Step 3 */ - switch( z[0] ){ - case 'e': - if( !stem(&z, "etaci", "ic", m_gt_0) - && !stem(&z, "evita", "", m_gt_0) - ){ - stem(&z, "ezila", "al", m_gt_0); - } - break; - case 'i': - stem(&z, "itici", "ic", m_gt_0); - break; - case 'l': - if( !stem(&z, "laci", "ic", m_gt_0) ){ - stem(&z, "luf", "", m_gt_0); - } - break; - case 's': - stem(&z, "ssen", "", m_gt_0); - break; + if( rc==SQLITE_OK ){ + pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); + if( pTab==0 ){ + rc = SQLITE_NOMEM; + } } - /* Step 4 */ - switch( z[1] ){ - case 'a': - if( z[0]=='l' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'c': - if( z[0]=='e' && z[2]=='n' && (z[3]=='a' || z[3]=='e') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'e': - if( z[0]=='r' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'i': - if( z[0]=='c' && m_gt_1(z+2) ){ - z += 2; - } - break; - case 'l': - if( z[0]=='e' && z[2]=='b' && (z[3]=='a' || z[3]=='i') && m_gt_1(z+4) ){ - z += 4; - } - break; - case 'n': - if( z[0]=='t' ){ - if( z[2]=='a' ){ - if( m_gt_1(z+3) ){ - z += 3; - } - }else if( z[2]=='e' ){ - if( !stem(&z, "tneme", "", m_gt_1) - && !stem(&z, "tnem", "", m_gt_1) - ){ - stem(&z, "tne", "", m_gt_1); - } - } - } - break; - case 'o': - if( z[0]=='u' ){ - if( m_gt_1(z+2) ){ - z += 2; - } - }else if( z[3]=='s' || z[3]=='t' ){ - stem(&z, "noi", "", m_gt_1); - } - break; - case 's': - if( z[0]=='m' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 't': - if( !stem(&z, "eta", "", m_gt_1) ){ - stem(&z, "iti", "", m_gt_1); - } - break; - case 'u': - if( z[0]=='s' && z[2]=='o' && m_gt_1(z+3) ){ - z += 3; - } - break; - case 'v': - case 'z': - if( z[0]=='e' && z[2]=='i' && m_gt_1(z+3) ){ - z += 3; - } - break; + if( rc==SQLITE_OK ){ + memset(pTab, 0, sizeof(Fts3tokTable)); + pTab->pMod = pMod; + pTab->pTok = pTok; + *ppVtab = &pTab->base; + }else{ + if( pTok ){ + pMod->xDestroy(pTok); + } } - /* Step 5a */ - if( z[0]=='e' ){ - if( m_gt_1(z+1) ){ - z++; - }else if( m_eq_1(z+1) && !star_oh(z+1) ){ - z++; + sqlite3_free(azDequote); + return rc; +} + +/* +** This function does the work for both the xDisconnect and xDestroy methods. +** These tables have no persistent representation of their own, so xDisconnect +** and xDestroy are identical operations. +*/ +static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ + Fts3tokTable *pTab = (Fts3tokTable *)pVtab; + + pTab->pMod->xDestroy(pTab->pTok); + sqlite3_free(pTab); + return SQLITE_OK; +} + +/* +** xBestIndex - Analyze a WHERE and ORDER BY clause. +*/ +static int fts3tokBestIndexMethod( + sqlite3_vtab *pVTab, + sqlite3_index_info *pInfo +){ + int i; + UNUSED_PARAMETER(pVTab); + + for(i=0; i<pInfo->nConstraint; i++){ + if( pInfo->aConstraint[i].usable + && pInfo->aConstraint[i].iColumn==0 + && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + pInfo->idxNum = 1; + pInfo->aConstraintUsage[i].argvIndex = 1; + pInfo->aConstraintUsage[i].omit = 1; + pInfo->estimatedCost = 1; + return SQLITE_OK; } } - /* Step 5b */ - if( m_gt_1(z) && z[0]=='l' && z[1]=='l' ){ - z++; + pInfo->idxNum = 0; + assert( pInfo->estimatedCost>1000000.0 ); + + return SQLITE_OK; +} + +/* +** xOpen - Open a cursor. +*/ +static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ + Fts3tokCursor *pCsr; + UNUSED_PARAMETER(pVTab); + + pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); + if( pCsr==0 ){ + return SQLITE_NOMEM; } + memset(pCsr, 0, sizeof(Fts3tokCursor)); - /* z[] is now the stemmed word in reverse order. Flip it back - ** around into forward order and return. - */ - *pnOut = i = (int)strlen(z); - zOut[i] = 0; - while( *z ){ - zOut[--i] = *(z++); + *ppCsr = (sqlite3_vtab_cursor *)pCsr; + return SQLITE_OK; +} + +/* +** Reset the tokenizer cursor passed as the only argument. As if it had +** just been returned by fts3tokOpenMethod(). +*/ +static void fts3tokResetCursor(Fts3tokCursor *pCsr){ + if( pCsr->pCsr ){ + Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); + pTab->pMod->xClose(pCsr->pCsr); + pCsr->pCsr = 0; } + sqlite3_free(pCsr->zInput); + pCsr->zInput = 0; + pCsr->zToken = 0; + pCsr->nToken = 0; + pCsr->iStart = 0; + pCsr->iEnd = 0; + pCsr->iPos = 0; + pCsr->iRowid = 0; } /* -** Characters that can be part of a token. We assume any character -** whose value is greater than 0x80 (any UTF character) can be -** part of a token. In other words, delimiters all must have -** values of 0x7f or lower. +** xClose - Close a cursor. */ -static const char porterIdChar[] = { -/* x0 x1 x2 x3 x4 x5 x6 x7 x8 x9 xA xB xC xD xE xF */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ -}; -#define isDelim(C) (((ch=C)&0x80)==0 && (ch<0x30 || !porterIdChar[ch-0x30])) +static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + fts3tokResetCursor(pCsr); + sqlite3_free(pCsr); + return SQLITE_OK; +} /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to porterOpen(). +** xNext - Advance the cursor to the next row, if any. */ -static int porterNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by porterOpen */ - const char **pzToken, /* OUT: *pzToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ -){ - porter_tokenizer_cursor *c = (porter_tokenizer_cursor *) pCursor; - const char *z = c->zInput; +static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + int rc; /* Return code */ - while( c->iOffset<c->nInput ){ - int iStartOffset, ch; + pCsr->iRowid++; + rc = pTab->pMod->xNext(pCsr->pCsr, + &pCsr->zToken, &pCsr->nToken, + &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos + ); - /* Scan past delimiter characters */ - while( c->iOffset<c->nInput && isDelim(z[c->iOffset]) ){ - c->iOffset++; - } + if( rc!=SQLITE_OK ){ + fts3tokResetCursor(pCsr); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffset<c->nInput && !isDelim(z[c->iOffset]) ){ - c->iOffset++; - } + return rc; +} - if( c->iOffset>iStartOffset ){ - int n = c->iOffset-iStartOffset; - if( n>c->nAllocated ){ - char *pNew; - c->nAllocated = n+20; - pNew = sqlite3_realloc(c->zToken, c->nAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->zToken = pNew; +/* +** xFilter - Initialize a cursor to point at the start of its data. +*/ +static int fts3tokFilterMethod( + sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ + int idxNum, /* Strategy index */ + const char *idxStr, /* Unused */ + int nVal, /* Number of elements in apVal */ + sqlite3_value **apVal /* Arguments for the indexing scheme */ +){ + int rc = SQLITE_ERROR; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); + UNUSED_PARAMETER(idxStr); + UNUSED_PARAMETER(nVal); + + fts3tokResetCursor(pCsr); + if( idxNum==1 ){ + const char *zByte = (const char *)sqlite3_value_text(apVal[0]); + int nByte = sqlite3_value_bytes(apVal[0]); + pCsr->zInput = sqlite3_malloc(nByte+1); + if( pCsr->zInput==0 ){ + rc = SQLITE_NOMEM; + }else{ + memcpy(pCsr->zInput, zByte, nByte); + pCsr->zInput[nByte] = 0; + rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); + if( rc==SQLITE_OK ){ + pCsr->pCsr->pTokenizer = pTab->pTok; } - porter_stemmer(&z[iStartOffset], n, c->zToken, pnBytes); - *pzToken = c->zToken; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; - return SQLITE_OK; } } - return SQLITE_DONE; + + if( rc!=SQLITE_OK ) return rc; + return fts3tokNextMethod(pCursor); } /* -** The set of routines that implement the porter-stemmer tokenizer +** xEof - Return true if the cursor is at EOF, or false otherwise. */ -static const sqlite3_tokenizer_module porterTokenizerModule = { - 0, - porterCreate, - porterDestroy, - porterOpen, - porterClose, - porterNext, - 0 -}; +static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + return (pCsr->zToken==0); +} /* -** Allocate a new porter tokenizer. Return a pointer to the new -** tokenizer in *ppModule +** xColumn - Return a column value. */ -SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( - sqlite3_tokenizer_module const**ppModule +static int fts3tokColumnMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ + int iCol /* Index of column to read value from */ ){ - *ppModule = &porterTokenizerModule; + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + + /* CREATE TABLE x(input, token, start, end, position) */ + switch( iCol ){ + case 0: + sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); + break; + case 1: + sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); + break; + case 2: + sqlite3_result_int(pCtx, pCsr->iStart); + break; + case 3: + sqlite3_result_int(pCtx, pCsr->iEnd); + break; + default: + assert( iCol==4 ); + sqlite3_result_int(pCtx, pCsr->iPos); + break; + } + return SQLITE_OK; +} + +/* +** xRowid - Return the current rowid for the cursor. +*/ +static int fts3tokRowidMethod( + sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ + sqlite_int64 *pRowid /* OUT: Rowid value */ +){ + Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + *pRowid = (sqlite3_int64)pCsr->iRowid; + return SQLITE_OK; +} + +/* +** Register the fts3tok module with database connection db. Return SQLITE_OK +** if successful or an error code if sqlite3_create_module() fails. +*/ +SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ + static const sqlite3_module fts3tok_module = { + 0, /* iVersion */ + fts3tokConnectMethod, /* xCreate */ + fts3tokConnectMethod, /* xConnect */ + fts3tokBestIndexMethod, /* xBestIndex */ + fts3tokDisconnectMethod, /* xDisconnect */ + fts3tokDisconnectMethod, /* xDestroy */ + fts3tokOpenMethod, /* xOpen */ + fts3tokCloseMethod, /* xClose */ + fts3tokFilterMethod, /* xFilter */ + fts3tokNextMethod, /* xNext */ + fts3tokEofMethod, /* xEof */ + fts3tokColumnMethod, /* xColumn */ + fts3tokRowidMethod, /* xRowid */ + 0, /* xUpdate */ + 0, /* xBegin */ + 0, /* xSync */ + 0, /* xCommit */ + 0, /* xRollback */ + 0, /* xFindFunction */ + 0, /* xRename */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ + }; + int rc; /* Return code */ + + rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); + return rc; } #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -/************** End of fts3_porter.c *****************************************/ -/************** Begin file fts3_tokenizer.c **********************************/ +/************** End of fts3_tokenize_vtab.c **********************************/ +/************** Begin file fts3_write.c **************************************/ /* -** 2007 June 22 +** 2009 Oct 23 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -141782,860 +144463,1147 @@ SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( ** ****************************************************************************** ** -** This is part of an SQLite module implementing full-text search. -** This particular file implements the generic tokenizer interface. +** This file is part of the SQLite FTS3 extension module. Specifically, +** this file contains code to insert, update and delete rows from FTS3 +** tables. It also contains code to merge FTS3 b-tree segments. Some +** of the sub-routines used to merge segments are also used by the query +** code in fts3.c. */ -/* -** The code in this file is only compiled if: -** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or -** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). -*/ +/* #include "fts3Int.h" */ #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) -/* #include <assert.h> */ /* #include <string.h> */ +/* #include <assert.h> */ +/* #include <stdlib.h> */ + + +#define FTS_MAX_APPENDABLE_HEIGHT 16 /* -** Implementation of the SQL scalar function for accessing the underlying -** hash table. This function may be called as follows: -** -** SELECT <function-name>(<key-name>); -** SELECT <function-name>(<key-name>, <pointer>); +** When full-text index nodes are loaded from disk, the buffer that they +** are loaded into has the following number of bytes of padding at the end +** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer +** of 920 bytes is allocated for it. ** -** where <function-name> is the name passed as the second argument -** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer'). +** This means that if we have a pointer into a buffer containing node data, +** it is always safe to read up to two varints from it without risking an +** overread, even if the node data is corrupted. +*/ +#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) + +/* +** Under certain circumstances, b-tree nodes (doclists) can be loaded into +** memory incrementally instead of all at once. This can be a big performance +** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() +** method before retrieving all query results (as may happen, for example, +** if a query has a LIMIT clause). ** -** If the <pointer> argument is specified, it must be a blob value -** containing a pointer to be stored as the hash data corresponding -** to the string <key-name>. If <pointer> is not specified, then -** the string <key-name> must already exist in the has table. Otherwise, -** an error is returned. +** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD +** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. +** The code is written so that the hard lower-limit for each of these values +** is 1. Clearly such small values would be inefficient, but can be useful +** for testing purposes. ** -** Whether or not the <pointer> argument is specified, the value returned -** is a blob containing the pointer stored as the hash data corresponding -** to string <key-name> (after the hash-table is updated, if applicable). +** If this module is built with SQLITE_TEST defined, these constants may +** be overridden at runtime for testing purposes. File fts3_test.c contains +** a Tcl interface to read and write the values. */ -static void scalarFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - Fts3Hash *pHash; - void *pPtr = 0; - const unsigned char *zName; - int nName; - - assert( argc==1 || argc==2 ); - - pHash = (Fts3Hash *)sqlite3_user_data(context); - - zName = sqlite3_value_text(argv[0]); - nName = sqlite3_value_bytes(argv[0])+1; - - if( argc==2 ){ - void *pOld; - int n = sqlite3_value_bytes(argv[1]); - if( zName==0 || n!=sizeof(pPtr) ){ - sqlite3_result_error(context, "argument type mismatch", -1); - return; - } - pPtr = *(void **)sqlite3_value_blob(argv[1]); - pOld = sqlite3Fts3HashInsert(pHash, (void *)zName, nName, pPtr); - if( pOld==pPtr ){ - sqlite3_result_error(context, "out of memory", -1); - return; - } - }else{ - if( zName ){ - pPtr = sqlite3Fts3HashFind(pHash, zName, nName); - } - if( !pPtr ){ - char *zErr = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr, -1); - sqlite3_free(zErr); - return; - } - } +#ifdef SQLITE_TEST +int test_fts3_node_chunksize = (4*1024); +int test_fts3_node_chunk_threshold = (4*1024)*4; +# define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize +# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold +#else +# define FTS3_NODE_CHUNKSIZE (4*1024) +# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) +#endif - sqlite3_result_blob(context, (void *)&pPtr, sizeof(pPtr), SQLITE_TRANSIENT); -} +/* +** The two values that may be meaningfully bound to the :1 parameter in +** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. +*/ +#define FTS_STAT_DOCTOTAL 0 +#define FTS_STAT_INCRMERGEHINT 1 +#define FTS_STAT_AUTOINCRMERGE 2 -SQLITE_PRIVATE int sqlite3Fts3IsIdChar(char c){ - static const char isFtsIdChar[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 1x */ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 2x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 3x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 5x */ - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 6x */ - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7x */ - }; - return (c&0x80 || isFtsIdChar[(int)(c)]); +/* +** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic +** and incremental merge operation that takes place. This is used for +** debugging FTS only, it should not usually be turned on in production +** systems. +*/ +#ifdef FTS3_LOG_MERGES +static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ + sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); } +#else +#define fts3LogMerge(x, y) +#endif -SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *zStr, int *pn){ - const char *z1; - const char *z2 = 0; - /* Find the start of the next token. */ - z1 = zStr; - while( z2==0 ){ - char c = *z1; - switch( c ){ - case '\0': return 0; /* No more tokens here */ - case '\'': - case '"': - case '`': { - z2 = z1; - while( *++z2 && (*z2!=c || *++z2==c) ); - break; - } - case '[': - z2 = &z1[1]; - while( *z2 && z2[0]!=']' ) z2++; - if( *z2 ) z2++; - break; +typedef struct PendingList PendingList; +typedef struct SegmentNode SegmentNode; +typedef struct SegmentWriter SegmentWriter; - default: - if( sqlite3Fts3IsIdChar(*z1) ){ - z2 = &z1[1]; - while( sqlite3Fts3IsIdChar(*z2) ) z2++; - }else{ - z1++; - } - } - } +/* +** An instance of the following data structure is used to build doclists +** incrementally. See function fts3PendingListAppend() for details. +*/ +struct PendingList { + int nData; + char *aData; + int nSpace; + sqlite3_int64 iLastDocid; + sqlite3_int64 iLastCol; + sqlite3_int64 iLastPos; +}; - *pn = (int)(z2-z1); - return z1; -} -SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( - Fts3Hash *pHash, /* Tokenizer hash table */ - const char *zArg, /* Tokenizer name */ - sqlite3_tokenizer **ppTok, /* OUT: Tokenizer (if applicable) */ - char **pzErr /* OUT: Set to malloced error message */ -){ - int rc; - char *z = (char *)zArg; - int n = 0; - char *zCopy; - char *zEnd; /* Pointer to nul-term of zCopy */ - sqlite3_tokenizer_module *m; +/* +** Each cursor has a (possibly empty) linked list of the following objects. +*/ +struct Fts3DeferredToken { + Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ + int iCol; /* Column token must occur in */ + Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ + PendingList *pList; /* Doclist is assembled here */ +}; - zCopy = sqlite3_mprintf("%s", zArg); - if( !zCopy ) return SQLITE_NOMEM; - zEnd = &zCopy[strlen(zCopy)]; +/* +** An instance of this structure is used to iterate through the terms on +** a contiguous set of segment b-tree leaf nodes. Although the details of +** this structure are only manipulated by code in this file, opaque handles +** of type Fts3SegReader* are also used by code in fts3.c to iterate through +** terms when querying the full-text index. See functions: +** +** sqlite3Fts3SegReaderNew() +** sqlite3Fts3SegReaderFree() +** sqlite3Fts3SegReaderIterate() +** +** Methods used to manipulate Fts3SegReader structures: +** +** fts3SegReaderNext() +** fts3SegReaderFirstDocid() +** fts3SegReaderNextDocid() +*/ +struct Fts3SegReader { + int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ + u8 bLookup; /* True for a lookup only */ + u8 rootOnly; /* True for a root-only reader */ - z = (char *)sqlite3Fts3NextToken(zCopy, &n); - if( z==0 ){ - assert( n==0 ); - z = zCopy; - } - z[n] = '\0'; - sqlite3Fts3Dequote(z); + sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ + sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ + sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ + sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ - m = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); - if( !m ){ - sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", z); - rc = SQLITE_ERROR; - }else{ - char const **aArg = 0; - int iArg = 0; - z = &z[n+1]; - while( z<zEnd && (NULL!=(z = (char *)sqlite3Fts3NextToken(z, &n))) ){ - int nNew = sizeof(char *)*(iArg+1); - char const **aNew = (const char **)sqlite3_realloc((void *)aArg, nNew); - if( !aNew ){ - sqlite3_free(zCopy); - sqlite3_free((void *)aArg); - return SQLITE_NOMEM; - } - aArg = aNew; - aArg[iArg++] = z; - z[n] = '\0'; - sqlite3Fts3Dequote(z); - z = &z[n+1]; - } - rc = m->xCreate(iArg, aArg, ppTok); - assert( rc!=SQLITE_OK || *ppTok ); - if( rc!=SQLITE_OK ){ - sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer"); - }else{ - (*ppTok)->pModule = m; - } - sqlite3_free((void *)aArg); - } + char *aNode; /* Pointer to node data (or NULL) */ + int nNode; /* Size of buffer at aNode (or 0) */ + int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ + sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ - sqlite3_free(zCopy); - return rc; -} + Fts3HashElem **ppNextElem; + /* Variables set by fts3SegReaderNext(). These may be read directly + ** by the caller. They are valid from the time SegmentReaderNew() returns + ** until SegmentReaderNext() returns something other than SQLITE_OK + ** (i.e. SQLITE_DONE). + */ + int nTerm; /* Number of bytes in current term */ + char *zTerm; /* Pointer to current term */ + int nTermAlloc; /* Allocated size of zTerm buffer */ + char *aDoclist; /* Pointer to doclist of current entry */ + int nDoclist; /* Size of doclist in current entry */ -#ifdef SQLITE_TEST + /* The following variables are used by fts3SegReaderNextDocid() to iterate + ** through the current doclist (aDoclist/nDoclist). + */ + char *pOffsetList; + int nOffsetList; /* For descending pending seg-readers only */ + sqlite3_int64 iDocid; +}; -#include <tcl.h> -/* #include <string.h> */ +#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) +#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) /* -** Implementation of a special SQL scalar function for testing tokenizers -** designed to be used in concert with the Tcl testing framework. This -** function must be called with two or more arguments: -** -** SELECT <function-name>(<key-name>, ..., <input-string>); -** -** where <function-name> is the name passed as the second argument -** to the sqlite3Fts3InitHashTable() function (e.g. 'fts3_tokenizer') -** concatenated with the string '_test' (e.g. 'fts3_tokenizer_test'). +** An instance of this structure is used to create a segment b-tree in the +** database. The internal details of this type are only accessed by the +** following functions: ** -** The return value is a string that may be interpreted as a Tcl -** list. For each token in the <input-string>, three elements are -** added to the returned list. The first is the token position, the -** second is the token text (folded, stemmed, etc.) and the third is the -** substring of <input-string> associated with the token. For example, -** using the built-in "simple" tokenizer: +** fts3SegWriterAdd() +** fts3SegWriterFlush() +** fts3SegWriterFree() +*/ +struct SegmentWriter { + SegmentNode *pTree; /* Pointer to interior tree structure */ + sqlite3_int64 iFirst; /* First slot in %_segments written */ + sqlite3_int64 iFree; /* Next free slot in %_segments */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nSize; /* Size of allocation at aData */ + int nData; /* Bytes of data in aData */ + char *aData; /* Pointer to block from malloc() */ + i64 nLeafData; /* Number of bytes of leaf data written */ +}; + +/* +** Type SegmentNode is used by the following three functions to create +** the interior part of the segment b+-tree structures (everything except +** the leaf nodes). These functions and type are only ever used by code +** within the fts3SegWriterXXX() family of functions described above. ** -** SELECT fts_tokenizer_test('simple', 'I don't see how'); +** fts3NodeAddTerm() +** fts3NodeWrite() +** fts3NodeFree() ** -** will return the string: +** When a b+tree is written to the database (either as a result of a merge +** or the pending-terms table being flushed), leaves are written into the +** database file as soon as they are completely populated. The interior of +** the tree is assembled in memory and written out only once all leaves have +** been populated and stored. This is Ok, as the b+-tree fanout is usually +** very large, meaning that the interior of the tree consumes relatively +** little memory. +*/ +struct SegmentNode { + SegmentNode *pParent; /* Parent node (or NULL for root node) */ + SegmentNode *pRight; /* Pointer to right-sibling */ + SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ + int nEntry; /* Number of terms written to node so far */ + char *zTerm; /* Pointer to previous term buffer */ + int nTerm; /* Number of bytes in zTerm */ + int nMalloc; /* Size of malloc'd buffer at zMalloc */ + char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ + int nData; /* Bytes of valid data so far */ + char *aData; /* Node data */ +}; + +/* +** Valid values for the second argument to fts3SqlStmt(). +*/ +#define SQL_DELETE_CONTENT 0 +#define SQL_IS_EMPTY 1 +#define SQL_DELETE_ALL_CONTENT 2 +#define SQL_DELETE_ALL_SEGMENTS 3 +#define SQL_DELETE_ALL_SEGDIR 4 +#define SQL_DELETE_ALL_DOCSIZE 5 +#define SQL_DELETE_ALL_STAT 6 +#define SQL_SELECT_CONTENT_BY_ROWID 7 +#define SQL_NEXT_SEGMENT_INDEX 8 +#define SQL_INSERT_SEGMENTS 9 +#define SQL_NEXT_SEGMENTS_ID 10 +#define SQL_INSERT_SEGDIR 11 +#define SQL_SELECT_LEVEL 12 +#define SQL_SELECT_LEVEL_RANGE 13 +#define SQL_SELECT_LEVEL_COUNT 14 +#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 +#define SQL_DELETE_SEGDIR_LEVEL 16 +#define SQL_DELETE_SEGMENTS_RANGE 17 +#define SQL_CONTENT_INSERT 18 +#define SQL_DELETE_DOCSIZE 19 +#define SQL_REPLACE_DOCSIZE 20 +#define SQL_SELECT_DOCSIZE 21 +#define SQL_SELECT_STAT 22 +#define SQL_REPLACE_STAT 23 + +#define SQL_SELECT_ALL_PREFIX_LEVEL 24 +#define SQL_DELETE_ALL_TERMS_SEGDIR 25 +#define SQL_DELETE_SEGDIR_RANGE 26 +#define SQL_SELECT_ALL_LANGID 27 +#define SQL_FIND_MERGE_LEVEL 28 +#define SQL_MAX_LEAF_NODE_ESTIMATE 29 +#define SQL_DELETE_SEGDIR_ENTRY 30 +#define SQL_SHIFT_SEGDIR_ENTRY 31 +#define SQL_SELECT_SEGDIR 32 +#define SQL_CHOMP_SEGDIR 33 +#define SQL_SEGMENT_IS_APPENDABLE 34 +#define SQL_SELECT_INDEXES 35 +#define SQL_SELECT_MXLEVEL 36 + +#define SQL_SELECT_LEVEL_RANGE2 37 +#define SQL_UPDATE_LEVEL_IDX 38 +#define SQL_UPDATE_LEVEL 39 + +/* +** This function is used to obtain an SQLite prepared statement handle +** for the statement identified by the second argument. If successful, +** *pp is set to the requested statement handle and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned and *pp is set to 0. ** -** "{0 i I 1 dont don't 2 see see 3 how how}" -** +** If argument apVal is not NULL, then it must point to an array with +** at least as many entries as the requested statement has bound +** parameters. The values are bound to the statements parameters before +** returning. */ -static void testFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv +static int fts3SqlStmt( + Fts3Table *p, /* Virtual table handle */ + int eStmt, /* One of the SQL_XXX constants above */ + sqlite3_stmt **pp, /* OUT: Statement handle */ + sqlite3_value **apVal /* Values to bind to statement */ ){ - Fts3Hash *pHash; - sqlite3_tokenizer_module *p; - sqlite3_tokenizer *pTokenizer = 0; - sqlite3_tokenizer_cursor *pCsr = 0; + const char *azSql[] = { +/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", +/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", +/* 2 */ "DELETE FROM %Q.'%q_content'", +/* 3 */ "DELETE FROM %Q.'%q_segments'", +/* 4 */ "DELETE FROM %Q.'%q_segdir'", +/* 5 */ "DELETE FROM %Q.'%q_docsize'", +/* 6 */ "DELETE FROM %Q.'%q_stat'", +/* 7 */ "SELECT %s WHERE rowid=?", +/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", +/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", +/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", +/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", - const char *zErr = 0; + /* Return segments in order from oldest to newest.*/ +/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", +/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" + "ORDER BY level DESC, idx ASC", - const char *zName; - int nName; - const char *zInput; - int nInput; +/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", +/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", - const char *azArg[64]; +/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", +/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", +/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", +/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", +/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", +/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", +/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", +/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", +/* 24 */ "", +/* 25 */ "", - const char *zToken; - int nToken = 0; - int iStart = 0; - int iEnd = 0; - int iPos = 0; - int i; +/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", +/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", - Tcl_Obj *pRet; +/* This statement is used to determine which level to read the input from +** when performing an incremental merge. It returns the absolute level number +** of the oldest level in the db that contains at least ? segments. Or, +** if no level in the FTS index contains more than ? segments, the statement +** returns zero rows. */ +/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?" + " ORDER BY (level %% 1024) ASC LIMIT 1", - if( argc<2 ){ - sqlite3_result_error(context, "insufficient arguments", -1); - return; - } +/* Estimate the upper limit on the number of leaf nodes in a new segment +** created by merging the oldest :2 segments from absolute level :1. See +** function sqlite3Fts3Incrmerge() for details. */ +/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " + " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", - nName = sqlite3_value_bytes(argv[0]); - zName = (const char *)sqlite3_value_text(argv[0]); - nInput = sqlite3_value_bytes(argv[argc-1]); - zInput = (const char *)sqlite3_value_text(argv[argc-1]); +/* SQL_DELETE_SEGDIR_ENTRY +** Delete the %_segdir entry on absolute level :1 with index :2. */ +/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", - pHash = (Fts3Hash *)sqlite3_user_data(context); - p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); +/* SQL_SHIFT_SEGDIR_ENTRY +** Modify the idx value for the segment with idx=:3 on absolute level :2 +** to :1. */ +/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", - if( !p ){ - char *zErr2 = sqlite3_mprintf("unknown tokenizer: %s", zName); - sqlite3_result_error(context, zErr2, -1); - sqlite3_free(zErr2); - return; - } +/* SQL_SELECT_SEGDIR +** Read a single entry from the %_segdir table. The entry from absolute +** level :1 with index value :2. */ +/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " + "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", - pRet = Tcl_NewObj(); - Tcl_IncrRefCount(pRet); +/* SQL_CHOMP_SEGDIR +** Update the start_block (:1) and root (:2) fields of the %_segdir +** entry located on absolute level :3 with index :4. */ +/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" + "WHERE level = ? AND idx = ?", - for(i=1; i<argc-1; i++){ - azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); - } +/* SQL_SEGMENT_IS_APPENDABLE +** Return a single row if the segment with end_block=? is appendable. Or +** no rows otherwise. */ +/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", - if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ - zErr = "error in xCreate()"; - goto finish; - } - pTokenizer->pModule = p; - if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ - zErr = "error in xOpen()"; - goto finish; - } +/* SQL_SELECT_INDEXES +** Return the list of valid segment indexes for absolute level ? */ +/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", - while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ - Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - zToken = &zInput[iStart]; - nToken = iEnd-iStart; - Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); - } +/* SQL_SELECT_MXLEVEL +** Return the largest relative level in the FTS index or indexes. */ +/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", - if( SQLITE_OK!=p->xClose(pCsr) ){ - zErr = "error in xClose()"; - goto finish; - } - if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ - zErr = "error in xDestroy()"; - goto finish; - } + /* Return segments in order from oldest to newest.*/ +/* 37 */ "SELECT level, idx, end_block " + "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " + "ORDER BY level DESC, idx ASC", -finish: - if( zErr ){ - sqlite3_result_error(context, zErr, -1); - }else{ - sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); + /* Update statements used while promoting segments */ +/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " + "WHERE level=? AND idx=?", +/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + + }; + int rc = SQLITE_OK; + sqlite3_stmt *pStmt; + + assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); + assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); + + pStmt = p->aStmt[eStmt]; + if( !pStmt ){ + char *zSql; + if( eStmt==SQL_CONTENT_INSERT ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); + }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ + zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); + }else{ + zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); + } + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); + sqlite3_free(zSql); + assert( rc==SQLITE_OK || pStmt==0 ); + p->aStmt[eStmt] = pStmt; + } + } + if( apVal ){ + int i; + int nParam = sqlite3_bind_parameter_count(pStmt); + for(i=0; rc==SQLITE_OK && i<nParam; i++){ + rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); + } } - Tcl_DecrRefCount(pRet); + *pp = pStmt; + return rc; } -static -int registerTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module *p + +static int fts3SelectDocsize( + Fts3Table *pTab, /* FTS3 table handle */ + sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ ){ - int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?, ?)"; + sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ + int rc; /* Return code */ - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; + rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iDocid); + rc = sqlite3_step(pStmt); + if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ + rc = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; + pStmt = 0; + }else{ + rc = SQLITE_OK; + } } - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); - sqlite3_step(pStmt); - - return sqlite3_finalize(pStmt); + *ppStmt = pStmt; + return rc; } -static -int queryTokenizer( - sqlite3 *db, - char *zName, - const sqlite3_tokenizer_module **pp +SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ ){ + sqlite3_stmt *pStmt = 0; int rc; - sqlite3_stmt *pStmt; - const char zSql[] = "SELECT fts3_tokenizer(?)"; - - *pp = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ - memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); + rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + if( sqlite3_step(pStmt)!=SQLITE_ROW + || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB + ){ + rc = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; + pStmt = 0; } } - - return sqlite3_finalize(pStmt); + *ppStmt = pStmt; + return rc; } -SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule); +SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( + Fts3Table *pTab, /* Fts3 table handle */ + sqlite3_int64 iDocid, /* Docid to read size data for */ + sqlite3_stmt **ppStmt /* OUT: Statement handle */ +){ + return fts3SelectDocsize(pTab, iDocid, ppStmt); +} /* -** Implementation of the scalar function fts3_tokenizer_internal_test(). -** This function is used for testing only, it is not included in the -** build unless SQLITE_TEST is defined. -** -** The purpose of this is to test that the fts3_tokenizer() function -** can be used as designed by the C-code in the queryTokenizer and -** registerTokenizer() functions above. These two functions are repeated -** in the README.tokenizer file as an example, so it is important to -** test them. -** -** To run the tests, evaluate the fts3_tokenizer_internal_test() scalar -** function with no arguments. An assert() will fail if a problem is -** detected. i.e.: -** -** SELECT fts3_tokenizer_internal_test(); +** Similar to fts3SqlStmt(). Except, after binding the parameters in +** array apVal[] to the SQL statement identified by eStmt, the statement +** is executed. ** +** Returns SQLITE_OK if the statement is successfully executed, or an +** SQLite error code otherwise. */ -static void intTestFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv +static void fts3SqlExec( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS3 table */ + int eStmt, /* Index of statement to evaluate */ + sqlite3_value **apVal /* Parameters to bind */ ){ + sqlite3_stmt *pStmt; int rc; - const sqlite3_tokenizer_module *p1; - const sqlite3_tokenizer_module *p2; - sqlite3 *db = (sqlite3 *)sqlite3_user_data(context); - - UNUSED_PARAMETER(argc); - UNUSED_PARAMETER(argv); - - /* Test the query function */ - sqlite3Fts3SimpleTokenizerModule(&p1); - rc = queryTokenizer(db, "simple", &p2); - assert( rc==SQLITE_OK ); - assert( p1==p2 ); - rc = queryTokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_ERROR ); - assert( p2==0 ); - assert( 0==strcmp(sqlite3_errmsg(db), "unknown tokenizer: nosuchtokenizer") ); - - /* Test the storage function */ - rc = registerTokenizer(db, "nosuchtokenizer", p1); - assert( rc==SQLITE_OK ); - rc = queryTokenizer(db, "nosuchtokenizer", &p2); - assert( rc==SQLITE_OK ); - assert( p2==p1 ); - - sqlite3_result_text(context, "ok", -1, SQLITE_STATIC); + if( *pRC ) return; + rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); + if( rc==SQLITE_OK ){ + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + *pRC = rc; } -#endif /* -** Set up SQL objects in database db used to access the contents of -** the hash table pointed to by argument pHash. The hash table must -** been initialized to use string keys, and to take a private copy -** of the key when a value is inserted. i.e. by a call similar to: -** -** sqlite3Fts3HashInit(pHash, FTS3_HASH_STRING, 1); -** -** This function adds a scalar function (see header comment above -** scalarFunc() in this file for details) and, if ENABLE_TABLE is -** defined at compilation time, a temporary virtual table (see header -** comment above struct HashTableVtab) to the database schema. Both -** provide read/write access to the contents of *pHash. +** This function ensures that the caller has obtained an exclusive +** shared-cache table-lock on the %_segdir table. This is required before +** writing data to the fts3 table. If this lock is not acquired first, then +** the caller may end up attempting to take this lock as part of committing +** a transaction, causing SQLite to return SQLITE_LOCKED or +** LOCKED_SHAREDCACHEto a COMMIT command. ** -** The third argument to this function, zName, is used as the name -** of both the scalar and, if created, the virtual table. +** It is best to avoid this because if FTS3 returns any error when +** committing a transaction, the whole transaction will be rolled back. +** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. +** It can still happen if the user locks the underlying tables directly +** instead of accessing them via FTS. */ -SQLITE_PRIVATE int sqlite3Fts3InitHashTable( - sqlite3 *db, - Fts3Hash *pHash, - const char *zName -){ +static int fts3Writelock(Fts3Table *p){ int rc = SQLITE_OK; - void *p = (void *)pHash; - const int any = SQLITE_ANY; - -#ifdef SQLITE_TEST - char *zTest = 0; - char *zTest2 = 0; - void *pdb = (void *)db; - zTest = sqlite3_mprintf("%s_test", zName); - zTest2 = sqlite3_mprintf("%s_internal_test", zName); - if( !zTest || !zTest2 ){ - rc = SQLITE_NOMEM; - } -#endif - - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zName, 1, any, p, scalarFunc, 0, 0); - } - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zName, 2, any, p, scalarFunc, 0, 0); - } -#ifdef SQLITE_TEST - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest, -1, any, p, testFunc, 0, 0); - } - if( SQLITE_OK==rc ){ - rc = sqlite3_create_function(db, zTest2, 0, any, pdb, intTestFunc, 0, 0); + + if( p->nPendingData==0 ){ + sqlite3_stmt *pStmt; + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_null(pStmt, 1); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } } -#endif - -#ifdef SQLITE_TEST - sqlite3_free(zTest); - sqlite3_free(zTest2); -#endif return rc; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenizer.c **************************************/ -/************** Begin file fts3_tokenizer1.c *********************************/ /* -** 2006 Oct 10 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** FTS maintains a separate indexes for each language-id (a 32-bit integer). +** Within each language id, a separate index is maintained to store the +** document terms, and each configured prefix size (configured the FTS +** "prefix=" option). And each index consists of multiple levels ("relative +** levels"). ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** All three of these values (the language id, the specific index and the +** level within the index) are encoded in 64-bit integer values stored +** in the %_segdir table on disk. This function is used to convert three +** separate component values into the single 64-bit integer value that +** can be used to query the %_segdir table. ** -****************************************************************************** +** Specifically, each language-id/index combination is allocated 1024 +** 64-bit integer level values ("absolute levels"). The main terms index +** for language-id 0 is allocate values 0-1023. The first prefix index +** (if any) for language-id 0 is allocated values 1024-2047. And so on. +** Language 1 indexes are allocated immediately following language 0. ** -** Implementation of the "simple" full-text-search tokenizer. +** So, for a system with nPrefix prefix indexes configured, the block of +** absolute levels that corresponds to language-id iLangid and index +** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). */ +static sqlite3_int64 getAbsoluteLevel( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id */ + int iIndex, /* Index in p->aIndex[] */ + int iLevel /* Level of segments */ +){ + sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ + assert( iLangid>=0 ); + assert( p->nIndex>0 ); + assert( iIndex>=0 && iIndex<p->nIndex ); + + iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; + return iBase + iLevel; +} /* -** The code in this file is only compiled if: +** Set *ppStmt to a statement handle that may be used to iterate through +** all rows in the %_segdir table, from oldest to newest. If successful, +** return SQLITE_OK. If an error occurs while preparing the statement, +** return an SQLite error code. ** -** * The FTS3 module is being built as an extension -** (in which case SQLITE_CORE is not defined), or +** There is only ever one instance of this SQL statement compiled for +** each FTS3 table. ** -** * The FTS3 module is being built into the core of -** SQLite (in which case SQLITE_ENABLE_FTS3 is defined). +** The statement returns the following columns from the %_segdir table: +** +** 0: idx +** 1: start_block +** 2: leaves_end_block +** 3: end_block +** 4: root */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( + Fts3Table *p, /* FTS3 table */ + int iLangid, /* Language being queried */ + int iIndex, /* Index for p->aIndex[] */ + int iLevel, /* Level to select (relative level) */ + sqlite3_stmt **ppStmt /* OUT: Compiled statement */ +){ + int rc; + sqlite3_stmt *pStmt = 0; -/* #include <assert.h> */ -/* #include <stdlib.h> */ -/* #include <stdio.h> */ -/* #include <string.h> */ + assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); + assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); + assert( iIndex>=0 && iIndex<p->nIndex ); + if( iLevel<0 ){ + /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } + }else{ + /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); + } + } + *ppStmt = pStmt; + return rc; +} -typedef struct simple_tokenizer { - sqlite3_tokenizer base; - char delim[128]; /* flag ASCII delimiters */ -} simple_tokenizer; -typedef struct simple_tokenizer_cursor { - sqlite3_tokenizer_cursor base; - const char *pInput; /* input we are tokenizing */ - int nBytes; /* size of the input */ - int iOffset; /* current position in pInput */ - int iToken; /* index of next token to be returned */ - char *pToken; /* storage for current token */ - int nTokenAllocated; /* space allocated to zToken buffer */ -} simple_tokenizer_cursor; +/* +** Append a single varint to a PendingList buffer. SQLITE_OK is returned +** if successful, or an SQLite error code otherwise. +** +** This function also serves to allocate the PendingList structure itself. +** For example, to create a new PendingList structure containing two +** varints: +** +** PendingList *p = 0; +** fts3PendingListAppendVarint(&p, 1); +** fts3PendingListAppendVarint(&p, 2); +*/ +static int fts3PendingListAppendVarint( + PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ + sqlite3_int64 i /* Value to append to data */ +){ + PendingList *p = *pp; + /* Allocate or grow the PendingList as required. */ + if( !p ){ + p = sqlite3_malloc(sizeof(*p) + 100); + if( !p ){ + return SQLITE_NOMEM; + } + p->nSpace = 100; + p->aData = (char *)&p[1]; + p->nData = 0; + } + else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ + int nNew = p->nSpace * 2; + p = sqlite3_realloc(p, sizeof(*p) + nNew); + if( !p ){ + sqlite3_free(*pp); + *pp = 0; + return SQLITE_NOMEM; + } + p->nSpace = nNew; + p->aData = (char *)&p[1]; + } -static int simpleDelim(simple_tokenizer *t, unsigned char c){ - return c<0x80 && t->delim[c]; -} -static int fts3_isalnum(int x){ - return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); + /* Append the new serialized varint to the end of the list. */ + p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); + p->aData[p->nData] = '\0'; + *pp = p; + return SQLITE_OK; } /* -** Create a new tokenizer instance. +** Add a docid/column/position entry to a PendingList structure. Non-zero +** is returned if the structure is sqlite3_realloced as part of adding +** the entry. Otherwise, zero. +** +** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. +** Zero is always returned in this case. Otherwise, if no OOM error occurs, +** it is set to SQLITE_OK. */ -static int simpleCreate( - int argc, const char * const *argv, - sqlite3_tokenizer **ppTokenizer +static int fts3PendingListAppend( + PendingList **pp, /* IN/OUT: PendingList structure */ + sqlite3_int64 iDocid, /* Docid for entry to add */ + sqlite3_int64 iCol, /* Column for entry to add */ + sqlite3_int64 iPos, /* Position of term for entry to add */ + int *pRc /* OUT: Return code */ ){ - simple_tokenizer *t; + PendingList *p = *pp; + int rc = SQLITE_OK; - t = (simple_tokenizer *) sqlite3_malloc(sizeof(*t)); - if( t==NULL ) return SQLITE_NOMEM; - memset(t, 0, sizeof(*t)); + assert( !p || p->iLastDocid<=iDocid ); - /* TODO(shess) Delimiters need to remain the same from run to run, - ** else we need to reindex. One solution would be a meta-table to - ** track such information in the database, then we'd only want this - ** information on the initial create. - */ - if( argc>1 ){ - int i, n = (int)strlen(argv[1]); - for(i=0; i<n; i++){ - unsigned char ch = argv[1][i]; - /* We explicitly don't support UTF-8 delimiters for now. */ - if( ch>=0x80 ){ - sqlite3_free(t); - return SQLITE_ERROR; - } - t->delim[ch] = 1; + if( !p || p->iLastDocid!=iDocid ){ + sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); + if( p ){ + assert( p->nData<p->nSpace ); + assert( p->aData[p->nData]==0 ); + p->nData++; } - } else { - /* Mark non-alphanumeric ASCII characters as delimiters */ - int i; - for(i=1; i<0x80; i++){ - t->delim[i] = !fts3_isalnum(i) ? -1 : 0; + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ + goto pendinglistappend_out; + } + p->iLastCol = -1; + p->iLastPos = 0; + p->iLastDocid = iDocid; + } + if( iCol>0 && p->iLastCol!=iCol ){ + if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) + || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) + ){ + goto pendinglistappend_out; + } + p->iLastCol = iCol; + p->iLastPos = 0; + } + if( iCol>=0 ){ + assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); + rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); + if( rc==SQLITE_OK ){ + p->iLastPos = iPos; } } - *ppTokenizer = &t->base; - return SQLITE_OK; + pendinglistappend_out: + *pRc = rc; + if( p!=*pp ){ + *pp = p; + return 1; + } + return 0; } /* -** Destroy a tokenizer +** Free a PendingList object allocated by fts3PendingListAppend(). */ -static int simpleDestroy(sqlite3_tokenizer *pTokenizer){ - sqlite3_free(pTokenizer); - return SQLITE_OK; +static void fts3PendingListDelete(PendingList *pList){ + sqlite3_free(pList); } /* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. +** Add an entry to one of the pending-terms hash tables. */ -static int simpleOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *pInput, int nBytes, /* String to be tokenized */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +static int fts3PendingTermsAddOne( + Fts3Table *p, + int iCol, + int iPos, + Fts3Hash *pHash, /* Pending terms hash table to add entry to */ + const char *zToken, + int nToken ){ - simple_tokenizer_cursor *c; - - UNUSED_PARAMETER(pTokenizer); - - c = (simple_tokenizer_cursor *) sqlite3_malloc(sizeof(*c)); - if( c==NULL ) return SQLITE_NOMEM; + PendingList *pList; + int rc = SQLITE_OK; - c->pInput = pInput; - if( pInput==0 ){ - c->nBytes = 0; - }else if( nBytes<0 ){ - c->nBytes = (int)strlen(pInput); - }else{ - c->nBytes = nBytes; + pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); + if( pList ){ + p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); } - c->iOffset = 0; /* start tokenizing at the beginning */ - c->iToken = 0; - c->pToken = NULL; /* no space allocated, yet. */ - c->nTokenAllocated = 0; - - *ppCursor = &c->base; - return SQLITE_OK; -} - -/* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. -*/ -static int simpleClose(sqlite3_tokenizer_cursor *pCursor){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - sqlite3_free(c->pToken); - sqlite3_free(c); - return SQLITE_OK; + if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ + if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ + /* Malloc failed while inserting the new entry. This can only + ** happen if there was no previous entry for this token. + */ + assert( 0==fts3HashFind(pHash, zToken, nToken) ); + sqlite3_free(pList); + rc = SQLITE_NOMEM; + } + } + if( rc==SQLITE_OK ){ + p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + } + return rc; } /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). +** Tokenize the nul-terminated string zText and add all tokens to the +** pending-terms hash-table. The docid used is that currently stored in +** p->iPrevDocid, and the column is specified by argument iCol. +** +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ -static int simpleNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ +static int fts3PendingTermsAdd( + Fts3Table *p, /* Table into which text will be inserted */ + int iLangid, /* Language id to use */ + const char *zText, /* Text of document to be inserted */ + int iCol, /* Column into which text is being inserted */ + u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ ){ - simple_tokenizer_cursor *c = (simple_tokenizer_cursor *) pCursor; - simple_tokenizer *t = (simple_tokenizer *) pCursor->pTokenizer; - unsigned char *p = (unsigned char *)c->pInput; + int rc; + int iStart = 0; + int iEnd = 0; + int iPos = 0; + int nWord = 0; - while( c->iOffset<c->nBytes ){ - int iStartOffset; + char const *zToken; + int nToken = 0; - /* Scan past delimiter characters */ - while( c->iOffset<c->nBytes && simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; + sqlite3_tokenizer *pTokenizer = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; + sqlite3_tokenizer_cursor *pCsr; + int (*xNext)(sqlite3_tokenizer_cursor *pCursor, + const char**,int*,int*,int*,int*); + + assert( pTokenizer && pModule ); + + /* If the user has inserted a NULL value, this function may be called with + ** zText==0. In this case, add zero token entries to the hash table and + ** return early. */ + if( zText==0 ){ + *pnWord = 0; + return SQLITE_OK; + } + + rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); + if( rc!=SQLITE_OK ){ + return rc; + } + + xNext = pModule->xNext; + while( SQLITE_OK==rc + && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) + ){ + int i; + if( iPos>=nWord ) nWord = iPos+1; + + /* Positions cannot be negative; we use -1 as a terminator internally. + ** Tokens must have a non-zero length. + */ + if( iPos<0 || !zToken || nToken<=0 ){ + rc = SQLITE_ERROR; + break; } - /* Count non-delimiter characters. */ - iStartOffset = c->iOffset; - while( c->iOffset<c->nBytes && !simpleDelim(t, p[c->iOffset]) ){ - c->iOffset++; + /* Add the term to the terms index */ + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken + ); + + /* Add the term to each of the prefix indexes that it is not too + ** short for. */ + for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){ + struct Fts3Index *pIndex = &p->aIndex[i]; + if( nToken<pIndex->nPrefix ) continue; + rc = fts3PendingTermsAddOne( + p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix + ); } + } - if( c->iOffset>iStartOffset ){ - int i, n = c->iOffset-iStartOffset; - if( n>c->nTokenAllocated ){ - char *pNew; - c->nTokenAllocated = n+20; - pNew = sqlite3_realloc(c->pToken, c->nTokenAllocated); - if( !pNew ) return SQLITE_NOMEM; - c->pToken = pNew; - } - for(i=0; i<n; i++){ - /* TODO(shess) This needs expansion to handle UTF-8 - ** case-insensitivity. - */ - unsigned char ch = p[iStartOffset+i]; - c->pToken[i] = (char)((ch>='A' && ch<='Z') ? ch-'A'+'a' : ch); - } - *ppToken = c->pToken; - *pnBytes = n; - *piStartOffset = iStartOffset; - *piEndOffset = c->iOffset; - *piPosition = c->iToken++; + pModule->xClose(pCsr); + *pnWord += nWord; + return (rc==SQLITE_DONE ? SQLITE_OK : rc); +} - return SQLITE_OK; - } +/* +** Calling this function indicates that subsequent calls to +** fts3PendingTermsAdd() are to add term/position-list pairs for the +** contents of the document with docid iDocid. +*/ +static int fts3PendingTermsDocid( + Fts3Table *p, /* Full-text table handle */ + int iLangid, /* Language id of row being written */ + sqlite_int64 iDocid /* Docid of row being written */ +){ + assert( iLangid>=0 ); + + /* TODO(shess) Explore whether partially flushing the buffer on + ** forced-flush would provide better performance. I suspect that if + ** we ordered the doclists by size and flushed the largest until the + ** buffer was half empty, that would let the less frequent terms + ** generate longer doclists. + */ + if( iDocid<=p->iPrevDocid + || p->iPrevLangid!=iLangid + || p->nPendingData>p->nMaxPendingData + ){ + int rc = sqlite3Fts3PendingTermsFlush(p); + if( rc!=SQLITE_OK ) return rc; } - return SQLITE_DONE; + p->iPrevDocid = iDocid; + p->iPrevLangid = iLangid; + return SQLITE_OK; } /* -** The set of routines that implement the simple tokenizer +** Discard the contents of the pending-terms hash tables. */ -static const sqlite3_tokenizer_module simpleTokenizerModule = { - 0, - simpleCreate, - simpleDestroy, - simpleOpen, - simpleClose, - simpleNext, - 0, -}; +SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ + int i; + for(i=0; i<p->nIndex; i++){ + Fts3HashElem *pElem; + Fts3Hash *pHash = &p->aIndex[i].hPending; + for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ + PendingList *pList = (PendingList *)fts3HashData(pElem); + fts3PendingListDelete(pList); + } + fts3HashClear(pHash); + } + p->nPendingData = 0; +} /* -** Allocate a new simple tokenizer. Return a pointer to the new -** tokenizer in *ppModule +** This function is called by the xUpdate() method as part of an INSERT +** operation. It adds entries for each term in the new record to the +** pendingTerms hash table. +** +** Argument apVal is the same as the similarly named argument passed to +** fts3InsertData(). Parameter iDocid is the docid of the new row. */ -SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( - sqlite3_tokenizer_module const**ppModule +static int fts3InsertTerms( + Fts3Table *p, + int iLangid, + sqlite3_value **apVal, + u32 *aSz ){ - *ppModule = &simpleTokenizerModule; + int i; /* Iterator variable */ + for(i=2; i<p->nColumn+2; i++){ + int iCol = i-2; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_value_text(apVal[i]); + int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); + if( rc!=SQLITE_OK ){ + return rc; + } + aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); + } + } + return SQLITE_OK; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenizer1.c *************************************/ -/************** Begin file fts3_tokenize_vtab.c ******************************/ /* -** 2013 Apr 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -****************************************************************************** -** -** This file contains code for the "fts3tokenize" virtual table module. -** An fts3tokenize virtual table is created as follows: -** -** CREATE VIRTUAL TABLE <tbl> USING fts3tokenize( -** <tokenizer-name>, <arg-1>, ... -** ); -** -** The table created has the following schema: -** -** CREATE TABLE <tbl>(input, token, start, end, position) -** -** When queried, the query must include a WHERE clause of type: -** -** input = <string> -** -** The virtual table module tokenizes this <string>, using the FTS3 -** tokenizer specified by the arguments to the CREATE VIRTUAL TABLE -** statement and returns one row for each token in the result. With -** fields set as follows: -** -** input: Always set to a copy of <string> -** token: A token from the input. -** start: Byte offset of the token within the input <string>. -** end: Byte offset of the byte immediately following the end of the -** token within the input string. -** pos: Token offset of token within input. +** This function is called by the xUpdate() method for an INSERT operation. +** The apVal parameter is passed a copy of the apVal argument passed by +** SQLite to the xUpdate() method. i.e: ** +** apVal[0] Not used for INSERT. +** apVal[1] rowid +** apVal[2] Left-most user-defined column +** ... +** apVal[p->nColumn+1] Right-most user-defined column +** apVal[p->nColumn+2] Hidden column with same name as table +** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) +** apVal[p->nColumn+4] Hidden languageid column */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +static int fts3InsertData( + Fts3Table *p, /* Full-text table */ + sqlite3_value **apVal, /* Array of values to insert */ + sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ +){ + int rc; /* Return code */ + sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ + + if( p->zContentTbl ){ + sqlite3_value *pRowid = apVal[p->nColumn+3]; + if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ + pRowid = apVal[1]; + } + if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ + return SQLITE_CONSTRAINT; + } + *piDocid = sqlite3_value_int64(pRowid); + return SQLITE_OK; + } + + /* Locate the statement handle used to insert data into the %_content + ** table. The SQL for this statement is: + ** + ** INSERT INTO %_content VALUES(?, ?, ?, ...) + ** + ** The statement features N '?' variables, where N is the number of user + ** defined columns in the FTS3 table, plus one for the docid field. + */ + rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); + if( rc==SQLITE_OK && p->zLanguageid ){ + rc = sqlite3_bind_int( + pContentInsert, p->nColumn+2, + sqlite3_value_int(apVal[p->nColumn+4]) + ); + } + if( rc!=SQLITE_OK ) return rc; + + /* There is a quirk here. The users INSERT statement may have specified + ** a value for the "rowid" field, for the "docid" field, or for both. + ** Which is a problem, since "rowid" and "docid" are aliases for the + ** same value. For example: + ** + ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); + ** + ** In FTS3, this is an error. It is an error to specify non-NULL values + ** for both docid and some other rowid alias. + */ + if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ + if( SQLITE_NULL==sqlite3_value_type(apVal[0]) + && SQLITE_NULL!=sqlite3_value_type(apVal[1]) + ){ + /* A rowid/docid conflict. */ + return SQLITE_ERROR; + } + rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); + if( rc!=SQLITE_OK ) return rc; + } + + /* Execute the statement to insert the record. Set *piDocid to the + ** new docid value. + */ + sqlite3_step(pContentInsert); + rc = sqlite3_reset(pContentInsert); + + *piDocid = sqlite3_last_insert_rowid(p->db); + return rc; +} -/* #include <string.h> */ -/* #include <assert.h> */ -typedef struct Fts3tokTable Fts3tokTable; -typedef struct Fts3tokCursor Fts3tokCursor; /* -** Virtual table structure. +** Remove all data from the FTS3 table. Clear the hash table containing +** pending terms. */ -struct Fts3tokTable { - sqlite3_vtab base; /* Base class used by SQLite core */ - const sqlite3_tokenizer_module *pMod; - sqlite3_tokenizer *pTok; -}; +static int fts3DeleteAll(Fts3Table *p, int bContent){ + int rc = SQLITE_OK; /* Return code */ + + /* Discard the contents of the pending-terms hash table. */ + sqlite3Fts3PendingTermsClear(p); + + /* Delete everything from the shadow tables. Except, leave %_content as + ** is if bContent is false. */ + assert( p->zContentTbl==0 || bContent==0 ); + if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); + fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); + } + if( p->bHasStat ){ + fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); + } + return rc; +} /* -** Virtual table cursor structure. +** */ -struct Fts3tokCursor { - sqlite3_vtab_cursor base; /* Base class used by SQLite core */ - char *zInput; /* Input string */ - sqlite3_tokenizer_cursor *pCsr; /* Cursor to iterate through zInput */ - int iRowid; /* Current 'rowid' value */ - const char *zToken; /* Current 'token' value */ - int nToken; /* Size of zToken in bytes */ - int iStart; /* Current 'start' value */ - int iEnd; /* Current 'end' value */ - int iPos; /* Current 'pos' value */ -}; +static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ + int iLangid = 0; + if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); + return iLangid; +} /* -** Query FTS for the tokenizer implementation named zName. +** The first element in the apVal[] array is assumed to contain the docid +** (an integer) of a row about to be deleted. Remove all terms from the +** full-text index. */ -static int fts3tokQueryTokenizer( - Fts3Hash *pHash, - const char *zName, - const sqlite3_tokenizer_module **pp, - char **pzErr +static void fts3DeleteTerms( + int *pRC, /* Result code */ + Fts3Table *p, /* The FTS table to delete from */ + sqlite3_value *pRowid, /* The docid to be deleted */ + u32 *aSz, /* Sizes of deleted document written here */ + int *pbFound /* OUT: Set to true if row really does exist */ ){ - sqlite3_tokenizer_module *p; - int nName = (int)strlen(zName); + int rc; + sqlite3_stmt *pSelect; - p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); - if( !p ){ - sqlite3Fts3ErrMsg(pzErr, "unknown tokenizer: %s", zName); - return SQLITE_ERROR; + assert( *pbFound==0 ); + if( *pRC ) return; + rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + int i; + int iLangid = langidFromSelect(p, pSelect); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); + for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ + int iCol = i-1; + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pSelect, i); + rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); + } + } + if( rc!=SQLITE_OK ){ + sqlite3_reset(pSelect); + *pRC = rc; + return; + } + *pbFound = 1; + } + rc = sqlite3_reset(pSelect); + }else{ + sqlite3_reset(pSelect); } - - *pp = p; - return SQLITE_OK; + *pRC = rc; } /* -** The second argument, argv[], is an array of pointers to nul-terminated -** strings. This function makes a copy of the array and strings into a -** single block of memory. It then dequotes any of the strings that appear -** to be quoted. +** Forward declaration to account for the circular dependency between +** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). +*/ +static int fts3SegmentMerge(Fts3Table *, int, int, int); + +/* +** This function allocates a new level iLevel index in the segdir table. +** Usually, indexes are allocated within a level sequentially starting +** with 0, so the allocated index is one greater than the value returned +** by: ** -** If successful, output parameter *pazDequote is set to point at the -** array of dequoted strings and SQLITE_OK is returned. The caller is -** responsible for eventually calling sqlite3_free() to free the array -** in this case. Or, if an error occurs, an SQLite error code is returned. -** The final value of *pazDequote is undefined in this case. +** SELECT max(idx) FROM %_segdir WHERE level = :iLevel +** +** However, if there are already FTS3_MERGE_COUNT indexes at the requested +** level, they are merged into a single level (iLevel+1) segment and the +** allocated index is 0. +** +** If successful, *piIdx is set to the allocated index slot and SQLITE_OK +** returned. Otherwise, an SQLite error code is returned. */ -static int fts3tokDequoteArray( - int argc, /* Number of elements in argv[] */ - const char * const *argv, /* Input array */ - char ***pazDequote /* Output array */ +static int fts3AllocateSegdirIdx( + Fts3Table *p, + int iLangid, /* Language id */ + int iIndex, /* Index for p->aIndex */ + int iLevel, + int *piIdx ){ - int rc = SQLITE_OK; /* Return code */ - if( argc==0 ){ - *pazDequote = 0; - }else{ - int i; - int nByte = 0; - char **azDequote; + int rc; /* Return Code */ + sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ + int iNext = 0; /* Result of query pNextIdx */ - for(i=0; i<argc; i++){ - nByte += (int)(strlen(argv[i]) + 1); + assert( iLangid>=0 ); + assert( p->nIndex>=1 ); + + /* Set variable iNext to the next available segdir index at level iLevel. */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ + iNext = sqlite3_column_int(pNextIdx, 0); } + rc = sqlite3_reset(pNextIdx); + } - *pazDequote = azDequote = sqlite3_malloc(sizeof(char *)*argc + nByte); - if( azDequote==0 ){ - rc = SQLITE_NOMEM; + if( rc==SQLITE_OK ){ + /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already + ** full, merge all segments in level iLevel into a single iLevel+1 + ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, + ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. + */ + if( iNext>=FTS3_MERGE_COUNT ){ + fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); + rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); + *piIdx = 0; }else{ - char *pSpace = (char *)&azDequote[argc]; - for(i=0; i<argc; i++){ - int n = (int)strlen(argv[i]); - azDequote[i] = pSpace; - memcpy(pSpace, argv[i], n+1); - sqlite3Fts3Dequote(pSpace); - pSpace += (n+1); - } + *piIdx = iNext; } } @@ -142643,5988 +145611,6988 @@ static int fts3tokDequoteArray( } /* -** Schema of the tokenizer table. -*/ -#define FTS3_TOK_SCHEMA "CREATE TABLE x(input, token, start, end, position)" - -/* -** This function does all the work for both the xConnect and xCreate methods. -** These tables have no persistent representation of their own, so xConnect -** and xCreate are identical operations. +** The %_segments table is declared as follows: ** -** argv[0]: module name -** argv[1]: database name -** argv[2]: table name -** argv[3]: first argument (tokenizer name) +** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) +** +** This function reads data from a single row of the %_segments table. The +** specific row is identified by the iBlockid parameter. If paBlob is not +** NULL, then a buffer is allocated using sqlite3_malloc() and populated +** with the contents of the blob stored in the "block" column of the +** identified table row is. Whether or not paBlob is NULL, *pnBlob is set +** to the size of the blob in bytes before returning. +** +** If an error occurs, or the table does not contain the specified row, +** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If +** paBlob is non-NULL, then it is the responsibility of the caller to +** eventually free the returned buffer. +** +** This function may leave an open sqlite3_blob* handle in the +** Fts3Table.pSegments variable. This handle is reused by subsequent calls +** to this function. The handle may be closed by calling the +** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy +** performance improvement, but the blob handle should always be closed +** before control is returned to the user (to prevent a lock being held +** on the database file for longer than necessary). Thus, any virtual table +** method (xFilter etc.) that may directly or indirectly call this function +** must call sqlite3Fts3SegmentsClose() before returning. */ -static int fts3tokConnectMethod( - sqlite3 *db, /* Database connection */ - void *pHash, /* Hash table of tokenizers */ - int argc, /* Number of elements in argv array */ - const char * const *argv, /* xCreate/xConnect argument array */ - sqlite3_vtab **ppVtab, /* OUT: New sqlite3_vtab object */ - char **pzErr /* OUT: sqlite3_malloc'd error message */ +SQLITE_PRIVATE int sqlite3Fts3ReadBlock( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ + char **paBlob, /* OUT: Blob data in malloc'd buffer */ + int *pnBlob, /* OUT: Size of blob data */ + int *pnLoad /* OUT: Bytes actually loaded */ ){ - Fts3tokTable *pTab = 0; - const sqlite3_tokenizer_module *pMod = 0; - sqlite3_tokenizer *pTok = 0; - int rc; - char **azDequote = 0; - int nDequote; - - rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); - if( rc!=SQLITE_OK ) return rc; + int rc; /* Return code */ - nDequote = argc-3; - rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote); + /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ + assert( pnBlob ); - if( rc==SQLITE_OK ){ - const char *zModule; - if( nDequote<1 ){ - zModule = "simple"; - }else{ - zModule = azDequote[0]; + if( p->pSegments ){ + rc = sqlite3_blob_reopen(p->pSegments, iBlockid); + }else{ + if( 0==p->zSegmentsTbl ){ + p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); + if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; } - rc = fts3tokQueryTokenizer((Fts3Hash*)pHash, zModule, &pMod, pzErr); - } - - assert( (rc==SQLITE_OK)==(pMod!=0) ); - if( rc==SQLITE_OK ){ - const char * const *azArg = (const char * const *)&azDequote[1]; - rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); + rc = sqlite3_blob_open( + p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments + ); } if( rc==SQLITE_OK ){ - pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable)); - if( pTab==0 ){ - rc = SQLITE_NOMEM; + int nByte = sqlite3_blob_bytes(p->pSegments); + *pnBlob = nByte; + if( paBlob ){ + char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); + if( !aByte ){ + rc = SQLITE_NOMEM; + }else{ + if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ + nByte = FTS3_NODE_CHUNKSIZE; + *pnLoad = nByte; + } + rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); + memset(&aByte[nByte], 0, FTS3_NODE_PADDING); + if( rc!=SQLITE_OK ){ + sqlite3_free(aByte); + aByte = 0; + } + } + *paBlob = aByte; } } + return rc; +} + +/* +** Close the blob handle at p->pSegments, if it is open. See comments above +** the sqlite3Fts3ReadBlock() function for details. +*/ +SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ + sqlite3_blob_close(p->pSegments); + p->pSegments = 0; +} + +static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ + int nRead; /* Number of bytes to read */ + int rc; /* Return code */ + + nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); + rc = sqlite3_blob_read( + pReader->pBlob, + &pReader->aNode[pReader->nPopulate], + nRead, + pReader->nPopulate + ); + if( rc==SQLITE_OK ){ - memset(pTab, 0, sizeof(Fts3tokTable)); - pTab->pMod = pMod; - pTab->pTok = pTok; - *ppVtab = &pTab->base; - }else{ - if( pTok ){ - pMod->xDestroy(pTok); + pReader->nPopulate += nRead; + memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); + if( pReader->nPopulate==pReader->nNode ){ + sqlite3_blob_close(pReader->pBlob); + pReader->pBlob = 0; + pReader->nPopulate = 0; } } + return rc; +} - sqlite3_free(azDequote); +static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ + int rc = SQLITE_OK; + assert( !pReader->pBlob + || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) + ); + while( pReader->pBlob && rc==SQLITE_OK + && (pFrom - pReader->aNode + nByte)>pReader->nPopulate + ){ + rc = fts3SegReaderIncrRead(pReader); + } return rc; } /* -** This function does the work for both the xDisconnect and xDestroy methods. -** These tables have no persistent representation of their own, so xDisconnect -** and xDestroy are identical operations. +** Set an Fts3SegReader cursor to point at EOF. */ -static int fts3tokDisconnectMethod(sqlite3_vtab *pVtab){ - Fts3tokTable *pTab = (Fts3tokTable *)pVtab; - - pTab->pMod->xDestroy(pTab->pTok); - sqlite3_free(pTab); - return SQLITE_OK; +static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ + if( !fts3SegReaderIsRootOnly(pSeg) ){ + sqlite3_free(pSeg->aNode); + sqlite3_blob_close(pSeg->pBlob); + pSeg->pBlob = 0; + } + pSeg->aNode = 0; } /* -** xBestIndex - Analyze a WHERE and ORDER BY clause. +** Move the iterator passed as the first argument to the next term in the +** segment. If successful, SQLITE_OK is returned. If there is no next term, +** SQLITE_DONE. Otherwise, an SQLite error code. */ -static int fts3tokBestIndexMethod( - sqlite3_vtab *pVTab, - sqlite3_index_info *pInfo +static int fts3SegReaderNext( + Fts3Table *p, + Fts3SegReader *pReader, + int bIncr ){ - int i; - UNUSED_PARAMETER(pVTab); + int rc; /* Return code of various sub-routines */ + char *pNext; /* Cursor variable */ + int nPrefix; /* Number of bytes in term prefix */ + int nSuffix; /* Number of bytes in term suffix */ - for(i=0; i<pInfo->nConstraint; i++){ - if( pInfo->aConstraint[i].usable - && pInfo->aConstraint[i].iColumn==0 - && pInfo->aConstraint[i].op==SQLITE_INDEX_CONSTRAINT_EQ - ){ - pInfo->idxNum = 1; - pInfo->aConstraintUsage[i].argvIndex = 1; - pInfo->aConstraintUsage[i].omit = 1; - pInfo->estimatedCost = 1; + if( !pReader->aDoclist ){ + pNext = pReader->aNode; + }else{ + pNext = &pReader->aDoclist[pReader->nDoclist]; + } + + if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ + + if( fts3SegReaderIsPending(pReader) ){ + Fts3HashElem *pElem = *(pReader->ppNextElem); + if( pElem==0 ){ + pReader->aNode = 0; + }else{ + PendingList *pList = (PendingList *)fts3HashData(pElem); + pReader->zTerm = (char *)fts3HashKey(pElem); + pReader->nTerm = fts3HashKeysize(pElem); + pReader->nNode = pReader->nDoclist = pList->nData + 1; + pReader->aNode = pReader->aDoclist = pList->aData; + pReader->ppNextElem++; + assert( pReader->aNode ); + } return SQLITE_OK; } - } - pInfo->idxNum = 0; - assert( pInfo->estimatedCost>1000000.0 ); + fts3SegReaderSetEof(pReader); - return SQLITE_OK; -} + /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf + ** blocks have already been traversed. */ + assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); + if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ + return SQLITE_OK; + } -/* -** xOpen - Open a cursor. -*/ -static int fts3tokOpenMethod(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCsr){ - Fts3tokCursor *pCsr; - UNUSED_PARAMETER(pVTab); + rc = sqlite3Fts3ReadBlock( + p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, + (bIncr ? &pReader->nPopulate : 0) + ); + if( rc!=SQLITE_OK ) return rc; + assert( pReader->pBlob==0 ); + if( bIncr && pReader->nPopulate<pReader->nNode ){ + pReader->pBlob = p->pSegments; + p->pSegments = 0; + } + pNext = pReader->aNode; + } - pCsr = (Fts3tokCursor *)sqlite3_malloc(sizeof(Fts3tokCursor)); - if( pCsr==0 ){ - return SQLITE_NOMEM; + assert( !fts3SegReaderIsPending(pReader) ); + + rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); + if( rc!=SQLITE_OK ) return rc; + + /* Because of the FTS3_NODE_PADDING bytes of padding, the following is + ** safe (no risk of overread) even if the node data is corrupted. */ + pNext += fts3GetVarint32(pNext, &nPrefix); + pNext += fts3GetVarint32(pNext, &nSuffix); + if( nPrefix<0 || nSuffix<=0 + || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] + ){ + return FTS_CORRUPT_VTAB; } - memset(pCsr, 0, sizeof(Fts3tokCursor)); - *ppCsr = (sqlite3_vtab_cursor *)pCsr; + if( nPrefix+nSuffix>pReader->nTermAlloc ){ + int nNew = (nPrefix+nSuffix)*2; + char *zNew = sqlite3_realloc(pReader->zTerm, nNew); + if( !zNew ){ + return SQLITE_NOMEM; + } + pReader->zTerm = zNew; + pReader->nTermAlloc = nNew; + } + + rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); + if( rc!=SQLITE_OK ) return rc; + + memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); + pReader->nTerm = nPrefix+nSuffix; + pNext += nSuffix; + pNext += fts3GetVarint32(pNext, &pReader->nDoclist); + pReader->aDoclist = pNext; + pReader->pOffsetList = 0; + + /* Check that the doclist does not appear to extend past the end of the + ** b-tree node. And that the final byte of the doclist is 0x00. If either + ** of these statements is untrue, then the data structure is corrupt. + */ + if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] + || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) + ){ + return FTS_CORRUPT_VTAB; + } return SQLITE_OK; } /* -** Reset the tokenizer cursor passed as the only argument. As if it had -** just been returned by fts3tokOpenMethod(). +** Set the SegReader to point to the first docid in the doclist associated +** with the current term. */ -static void fts3tokResetCursor(Fts3tokCursor *pCsr){ - if( pCsr->pCsr ){ - Fts3tokTable *pTab = (Fts3tokTable *)(pCsr->base.pVtab); - pTab->pMod->xClose(pCsr->pCsr); - pCsr->pCsr = 0; +static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ + int rc = SQLITE_OK; + assert( pReader->aDoclist ); + assert( !pReader->pOffsetList ); + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + u8 bEof = 0; + pReader->iDocid = 0; + pReader->nOffsetList = 0; + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, + &pReader->iDocid, &pReader->nOffsetList, &bEof + ); + }else{ + rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); + pReader->pOffsetList = &pReader->aDoclist[n]; + } } - sqlite3_free(pCsr->zInput); - pCsr->zInput = 0; - pCsr->zToken = 0; - pCsr->nToken = 0; - pCsr->iStart = 0; - pCsr->iEnd = 0; - pCsr->iPos = 0; - pCsr->iRowid = 0; + return rc; } /* -** xClose - Close a cursor. +** Advance the SegReader to point to the next docid in the doclist +** associated with the current term. +** +** If arguments ppOffsetList and pnOffsetList are not NULL, then +** *ppOffsetList is set to point to the first column-offset list +** in the doclist entry (i.e. immediately past the docid varint). +** *pnOffsetList is set to the length of the set of column-offset +** lists, not including the nul-terminator byte. For example: */ -static int fts3tokCloseMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; +static int fts3SegReaderNextDocid( + Fts3Table *pTab, + Fts3SegReader *pReader, /* Reader to advance to next docid */ + char **ppOffsetList, /* OUT: Pointer to current position-list */ + int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ +){ + int rc = SQLITE_OK; + char *p = pReader->pOffsetList; + char c = 0; - fts3tokResetCursor(pCsr); - sqlite3_free(pCsr); - return SQLITE_OK; -} + assert( p ); -/* -** xNext - Advance the cursor to the next row, if any. -*/ -static int fts3tokNextMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); - int rc; /* Return code */ + if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ + /* A pending-terms seg-reader for an FTS4 table that uses order=desc. + ** Pending-terms doclists are always built up in ascending order, so + ** we have to iterate through them backwards here. */ + u8 bEof = 0; + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = pReader->nOffsetList - 1; + } + sqlite3Fts3DoclistPrev(0, + pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, + &pReader->nOffsetList, &bEof + ); + if( bEof ){ + pReader->pOffsetList = 0; + }else{ + pReader->pOffsetList = p; + } + }else{ + char *pEnd = &pReader->aDoclist[pReader->nDoclist]; - pCsr->iRowid++; - rc = pTab->pMod->xNext(pCsr->pCsr, - &pCsr->zToken, &pCsr->nToken, - &pCsr->iStart, &pCsr->iEnd, &pCsr->iPos - ); + /* Pointer p currently points at the first byte of an offset list. The + ** following block advances it to point one byte past the end of + ** the same offset list. */ + while( 1 ){ + + /* The following line of code (and the "p++" below the while() loop) is + ** normally all that is required to move pointer p to the desired + ** position. The exception is if this node is being loaded from disk + ** incrementally and pointer "p" now points to the first byte past + ** the populated part of pReader->aNode[]. + */ + while( *p | c ) c = *p++ & 0x80; + assert( *p==0 ); + + if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; + rc = fts3SegReaderIncrRead(pReader); + if( rc!=SQLITE_OK ) return rc; + } + p++; + + /* If required, populate the output variables with a pointer to and the + ** size of the previous offset-list. + */ + if( ppOffsetList ){ + *ppOffsetList = pReader->pOffsetList; + *pnOffsetList = (int)(p - pReader->pOffsetList - 1); + } - if( rc!=SQLITE_OK ){ - fts3tokResetCursor(pCsr); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; + /* List may have been edited in place by fts3EvalNearTrim() */ + while( p<pEnd && *p==0 ) p++; + + /* If there are no more entries in the doclist, set pOffsetList to + ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and + ** Fts3SegReader.pOffsetList to point to the next offset list before + ** returning. + */ + if( p>=pEnd ){ + pReader->pOffsetList = 0; + }else{ + rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); + if( rc==SQLITE_OK ){ + sqlite3_int64 iDelta; + pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); + if( pTab->bDescIdx ){ + pReader->iDocid -= iDelta; + }else{ + pReader->iDocid += iDelta; + } + } + } } - return rc; + return SQLITE_OK; } -/* -** xFilter - Initialize a cursor to point at the start of its data. -*/ -static int fts3tokFilterMethod( - sqlite3_vtab_cursor *pCursor, /* The cursor used for this query */ - int idxNum, /* Strategy index */ - const char *idxStr, /* Unused */ - int nVal, /* Number of elements in apVal */ - sqlite3_value **apVal /* Arguments for the indexing scheme */ + +SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( + Fts3Cursor *pCsr, + Fts3MultiSegReader *pMsr, + int *pnOvfl ){ - int rc = SQLITE_ERROR; - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - Fts3tokTable *pTab = (Fts3tokTable *)(pCursor->pVtab); - UNUSED_PARAMETER(idxStr); - UNUSED_PARAMETER(nVal); + Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; + int nOvfl = 0; + int ii; + int rc = SQLITE_OK; + int pgsz = p->nPgsz; - fts3tokResetCursor(pCsr); - if( idxNum==1 ){ - const char *zByte = (const char *)sqlite3_value_text(apVal[0]); - int nByte = sqlite3_value_bytes(apVal[0]); - pCsr->zInput = sqlite3_malloc(nByte+1); - if( pCsr->zInput==0 ){ - rc = SQLITE_NOMEM; - }else{ - memcpy(pCsr->zInput, zByte, nByte); - pCsr->zInput[nByte] = 0; - rc = pTab->pMod->xOpen(pTab->pTok, pCsr->zInput, nByte, &pCsr->pCsr); - if( rc==SQLITE_OK ){ - pCsr->pCsr->pTokenizer = pTab->pTok; + assert( p->bFts4 ); + assert( pgsz>0 ); + + for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){ + Fts3SegReader *pReader = pMsr->apSegment[ii]; + if( !fts3SegReaderIsPending(pReader) + && !fts3SegReaderIsRootOnly(pReader) + ){ + sqlite3_int64 jj; + for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ + int nBlob; + rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); + if( rc!=SQLITE_OK ) break; + if( (nBlob+35)>pgsz ){ + nOvfl += (nBlob + 34)/pgsz; + } } } } - - if( rc!=SQLITE_OK ) return rc; - return fts3tokNextMethod(pCursor); + *pnOvfl = nOvfl; + return rc; } /* -** xEof - Return true if the cursor is at EOF, or false otherwise. +** Free all allocations associated with the iterator passed as the +** second argument. */ -static int fts3tokEofMethod(sqlite3_vtab_cursor *pCursor){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - return (pCsr->zToken==0); +SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ + if( pReader && !fts3SegReaderIsPending(pReader) ){ + sqlite3_free(pReader->zTerm); + if( !fts3SegReaderIsRootOnly(pReader) ){ + sqlite3_free(pReader->aNode); + sqlite3_blob_close(pReader->pBlob); + } + } + sqlite3_free(pReader); } /* -** xColumn - Return a column value. +** Allocate a new SegReader object. */ -static int fts3tokColumnMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite3_context *pCtx, /* Context for sqlite3_result_xxx() calls */ - int iCol /* Index of column to read value from */ +SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( + int iAge, /* Segment "age". */ + int bLookup, /* True for a lookup only */ + sqlite3_int64 iStartLeaf, /* First leaf to traverse */ + sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ + sqlite3_int64 iEndBlock, /* Final block of segment */ + const char *zRoot, /* Buffer containing root node */ + int nRoot, /* Size of buffer containing root node */ + Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ ){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; + Fts3SegReader *pReader; /* Newly allocated SegReader object */ + int nExtra = 0; /* Bytes to allocate segment root node */ - /* CREATE TABLE x(input, token, start, end, position) */ - switch( iCol ){ - case 0: - sqlite3_result_text(pCtx, pCsr->zInput, -1, SQLITE_TRANSIENT); - break; - case 1: - sqlite3_result_text(pCtx, pCsr->zToken, pCsr->nToken, SQLITE_TRANSIENT); - break; - case 2: - sqlite3_result_int(pCtx, pCsr->iStart); - break; - case 3: - sqlite3_result_int(pCtx, pCsr->iEnd); - break; - default: - assert( iCol==4 ); - sqlite3_result_int(pCtx, pCsr->iPos); - break; + assert( iStartLeaf<=iEndLeaf ); + if( iStartLeaf==0 ){ + nExtra = nRoot + FTS3_NODE_PADDING; } - return SQLITE_OK; -} -/* -** xRowid - Return the current rowid for the cursor. -*/ -static int fts3tokRowidMethod( - sqlite3_vtab_cursor *pCursor, /* Cursor to retrieve value from */ - sqlite_int64 *pRowid /* OUT: Rowid value */ -){ - Fts3tokCursor *pCsr = (Fts3tokCursor *)pCursor; - *pRowid = (sqlite3_int64)pCsr->iRowid; + pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); + if( !pReader ){ + return SQLITE_NOMEM; + } + memset(pReader, 0, sizeof(Fts3SegReader)); + pReader->iIdx = iAge; + pReader->bLookup = bLookup!=0; + pReader->iStartBlock = iStartLeaf; + pReader->iLeafEndBlock = iEndLeaf; + pReader->iEndBlock = iEndBlock; + + if( nExtra ){ + /* The entire segment is stored in the root node. */ + pReader->aNode = (char *)&pReader[1]; + pReader->rootOnly = 1; + pReader->nNode = nRoot; + memcpy(pReader->aNode, zRoot, nRoot); + memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); + }else{ + pReader->iCurrentBlock = iStartLeaf-1; + } + *ppReader = pReader; return SQLITE_OK; } /* -** Register the fts3tok module with database connection db. Return SQLITE_OK -** if successful or an error code if sqlite3_create_module() fails. +** This is a comparison function used as a qsort() callback when sorting +** an array of pending terms by term. This occurs as part of flushing +** the contents of the pending-terms hash table to the database. */ -SQLITE_PRIVATE int sqlite3Fts3InitTok(sqlite3 *db, Fts3Hash *pHash){ - static const sqlite3_module fts3tok_module = { - 0, /* iVersion */ - fts3tokConnectMethod, /* xCreate */ - fts3tokConnectMethod, /* xConnect */ - fts3tokBestIndexMethod, /* xBestIndex */ - fts3tokDisconnectMethod, /* xDisconnect */ - fts3tokDisconnectMethod, /* xDestroy */ - fts3tokOpenMethod, /* xOpen */ - fts3tokCloseMethod, /* xClose */ - fts3tokFilterMethod, /* xFilter */ - fts3tokNextMethod, /* xNext */ - fts3tokEofMethod, /* xEof */ - fts3tokColumnMethod, /* xColumn */ - fts3tokRowidMethod, /* xRowid */ - 0, /* xUpdate */ - 0, /* xBegin */ - 0, /* xSync */ - 0, /* xCommit */ - 0, /* xRollback */ - 0, /* xFindFunction */ - 0, /* xRename */ - 0, /* xSavepoint */ - 0, /* xRelease */ - 0 /* xRollbackTo */ - }; - int rc; /* Return code */ +static int SQLITE_CDECL fts3CompareElemByTerm( + const void *lhs, + const void *rhs +){ + char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); + char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); + int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); + int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); - rc = sqlite3_create_module(db, "fts3tokenize", &fts3tok_module, (void*)pHash); - return rc; + int n = (n1<n2 ? n1 : n2); + int c = memcmp(z1, z2, n); + if( c==0 ){ + c = n1 - n2; + } + return c; } -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ - -/************** End of fts3_tokenize_vtab.c **********************************/ -/************** Begin file fts3_write.c **************************************/ /* -** 2009 Oct 23 +** This function is used to allocate an Fts3SegReader that iterates through +** a subset of the terms stored in the Fts3Table.pendingTerms array. ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** If the isPrefixIter parameter is zero, then the returned SegReader iterates +** through each term in the pending-terms table. Or, if isPrefixIter is +** non-zero, it iterates through each term and its prefixes. For example, if +** the pending terms hash table contains the terms "sqlite", "mysql" and +** "firebird", then the iterator visits the following 'terms' (in the order +** shown): ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** f fi fir fire fireb firebi firebir firebird +** m my mys mysq mysql +** s sq sql sqli sqlit sqlite ** -****************************************************************************** +** Whereas if isPrefixIter is zero, the terms visited are: ** -** This file is part of the SQLite FTS3 extension module. Specifically, -** this file contains code to insert, update and delete rows from FTS3 -** tables. It also contains code to merge FTS3 b-tree segments. Some -** of the sub-routines used to merge segments are also used by the query -** code in fts3.c. +** firebird mysql sqlite */ +SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( + Fts3Table *p, /* Virtual table handle */ + int iIndex, /* Index for p->aIndex */ + const char *zTerm, /* Term to search for */ + int nTerm, /* Size of buffer zTerm */ + int bPrefix, /* True for a prefix iterator */ + Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ +){ + Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ + Fts3HashElem *pE; /* Iterator variable */ + Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ + int nElem = 0; /* Size of array at aElem */ + int rc = SQLITE_OK; /* Return Code */ + Fts3Hash *pHash; -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + pHash = &p->aIndex[iIndex].hPending; + if( bPrefix ){ + int nAlloc = 0; /* Size of allocated array at aElem */ -/* #include <string.h> */ -/* #include <assert.h> */ -/* #include <stdlib.h> */ + for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ + char *zKey = (char *)fts3HashKey(pE); + int nKey = fts3HashKeysize(pE); + if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ + if( nElem==nAlloc ){ + Fts3HashElem **aElem2; + nAlloc += 16; + aElem2 = (Fts3HashElem **)sqlite3_realloc( + aElem, nAlloc*sizeof(Fts3HashElem *) + ); + if( !aElem2 ){ + rc = SQLITE_NOMEM; + nElem = 0; + break; + } + aElem = aElem2; + } + + aElem[nElem++] = pE; + } + } + /* If more than one term matches the prefix, sort the Fts3HashElem + ** objects in term order using qsort(). This uses the same comparison + ** callback as is used when flushing terms to disk. + */ + if( nElem>1 ){ + qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); + } -#define FTS_MAX_APPENDABLE_HEIGHT 16 + }else{ + /* The query is a simple term lookup that matches at most one term in + ** the index. All that is required is a straight hash-lookup. + ** + ** Because the stack address of pE may be accessed via the aElem pointer + ** below, the "Fts3HashElem *pE" must be declared so that it is valid + ** within this entire function, not just this "else{...}" block. + */ + pE = fts3HashFindElem(pHash, zTerm, nTerm); + if( pE ){ + aElem = &pE; + nElem = 1; + } + } + + if( nElem>0 ){ + int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); + pReader = (Fts3SegReader *)sqlite3_malloc(nByte); + if( !pReader ){ + rc = SQLITE_NOMEM; + }else{ + memset(pReader, 0, nByte); + pReader->iIdx = 0x7FFFFFFF; + pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; + memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); + } + } + + if( bPrefix ){ + sqlite3_free(aElem); + } + *ppReader = pReader; + return rc; +} /* -** When full-text index nodes are loaded from disk, the buffer that they -** are loaded into has the following number of bytes of padding at the end -** of it. i.e. if a full-text index node is 900 bytes in size, then a buffer -** of 920 bytes is allocated for it. +** Compare the entries pointed to by two Fts3SegReader structures. +** Comparison is as follows: ** -** This means that if we have a pointer into a buffer containing node data, -** it is always safe to read up to two varints from it without risking an -** overread, even if the node data is corrupted. +** 1) EOF is greater than not EOF. +** +** 2) The current terms (if any) are compared using memcmp(). If one +** term is a prefix of another, the longer term is considered the +** larger. +** +** 3) By segment age. An older segment is considered larger. */ -#define FTS3_NODE_PADDING (FTS3_VARINT_MAX*2) +static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc; + if( pLhs->aNode && pRhs->aNode ){ + int rc2 = pLhs->nTerm - pRhs->nTerm; + if( rc2<0 ){ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); + }else{ + rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); + } + if( rc==0 ){ + rc = rc2; + } + }else{ + rc = (pLhs->aNode==0) - (pRhs->aNode==0); + } + if( rc==0 ){ + rc = pRhs->iIdx - pLhs->iIdx; + } + assert( rc!=0 ); + return rc; +} /* -** Under certain circumstances, b-tree nodes (doclists) can be loaded into -** memory incrementally instead of all at once. This can be a big performance -** win (reduced IO and CPU) if SQLite stops calling the virtual table xNext() -** method before retrieving all query results (as may happen, for example, -** if a query has a LIMIT clause). +** A different comparison function for SegReader structures. In this +** version, it is assumed that each SegReader points to an entry in +** a doclist for identical terms. Comparison is made as follows: ** -** Incremental loading is used for b-tree nodes FTS3_NODE_CHUNK_THRESHOLD -** bytes and larger. Nodes are loaded in chunks of FTS3_NODE_CHUNKSIZE bytes. -** The code is written so that the hard lower-limit for each of these values -** is 1. Clearly such small values would be inefficient, but can be useful -** for testing purposes. +** 1) EOF (end of doclist in this case) is greater than not EOF. ** -** If this module is built with SQLITE_TEST defined, these constants may -** be overridden at runtime for testing purposes. File fts3_test.c contains -** a Tcl interface to read and write the values. +** 2) By current docid. +** +** 3) By segment age. An older segment is considered larger. */ -#ifdef SQLITE_TEST -int test_fts3_node_chunksize = (4*1024); -int test_fts3_node_chunk_threshold = (4*1024)*4; -# define FTS3_NODE_CHUNKSIZE test_fts3_node_chunksize -# define FTS3_NODE_CHUNK_THRESHOLD test_fts3_node_chunk_threshold -#else -# define FTS3_NODE_CHUNKSIZE (4*1024) -# define FTS3_NODE_CHUNK_THRESHOLD (FTS3_NODE_CHUNKSIZE*4) -#endif +static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; + } + } + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} +static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ + int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); + if( rc==0 ){ + if( pLhs->iDocid==pRhs->iDocid ){ + rc = pRhs->iIdx - pLhs->iIdx; + }else{ + rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; + } + } + assert( pLhs->aNode && pRhs->aNode ); + return rc; +} /* -** The two values that may be meaningfully bound to the :1 parameter in -** statements SQL_REPLACE_STAT and SQL_SELECT_STAT. +** Compare the term that the Fts3SegReader object passed as the first argument +** points to with the term specified by arguments zTerm and nTerm. +** +** If the pSeg iterator is already at EOF, return 0. Otherwise, return +** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are +** equal, or +ve if the pSeg term is greater than zTerm/nTerm. */ -#define FTS_STAT_DOCTOTAL 0 -#define FTS_STAT_INCRMERGEHINT 1 -#define FTS_STAT_AUTOINCRMERGE 2 +static int fts3SegReaderTermCmp( + Fts3SegReader *pSeg, /* Segment reader object */ + const char *zTerm, /* Term to compare to */ + int nTerm /* Size of term zTerm in bytes */ +){ + int res = 0; + if( pSeg->aNode ){ + if( pSeg->nTerm>nTerm ){ + res = memcmp(pSeg->zTerm, zTerm, nTerm); + }else{ + res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); + } + if( res==0 ){ + res = pSeg->nTerm-nTerm; + } + } + return res; +} /* -** If FTS_LOG_MERGES is defined, call sqlite3_log() to report each automatic -** and incremental merge operation that takes place. This is used for -** debugging FTS only, it should not usually be turned on in production -** systems. +** Argument apSegment is an array of nSegment elements. It is known that +** the final (nSegment-nSuspect) members are already in sorted order +** (according to the comparison function provided). This function shuffles +** the array around until all entries are in sorted order. */ -#ifdef FTS3_LOG_MERGES -static void fts3LogMerge(int nMerge, sqlite3_int64 iAbsLevel){ - sqlite3_log(SQLITE_OK, "%d-way merge from level %d", nMerge, (int)iAbsLevel); -} -#else -#define fts3LogMerge(x, y) -#endif +static void fts3SegReaderSort( + Fts3SegReader **apSegment, /* Array to sort entries of */ + int nSegment, /* Size of apSegment array */ + int nSuspect, /* Unsorted entry count */ + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ +){ + int i; /* Iterator variable */ + assert( nSuspect<=nSegment ); -typedef struct PendingList PendingList; -typedef struct SegmentNode SegmentNode; -typedef struct SegmentWriter SegmentWriter; + if( nSuspect==nSegment ) nSuspect--; + for(i=nSuspect-1; i>=0; i--){ + int j; + for(j=i; j<(nSegment-1); j++){ + Fts3SegReader *pTmp; + if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; + pTmp = apSegment[j+1]; + apSegment[j+1] = apSegment[j]; + apSegment[j] = pTmp; + } + } + +#ifndef NDEBUG + /* Check that the list really is sorted now. */ + for(i=0; i<(nSuspect-1); i++){ + assert( xCmp(apSegment[i], apSegment[i+1])<0 ); + } +#endif +} + +/* +** Insert a record into the %_segments table. +*/ +static int fts3WriteSegment( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iBlock, /* Block id for new block */ + char *z, /* Pointer to buffer containing block data */ + int n /* Size of buffer z in bytes */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iBlock); + sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + return rc; +} /* -** An instance of the following data structure is used to build doclists -** incrementally. See function fts3PendingListAppend() for details. +** Find the largest relative level number in the table. If successful, set +** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, +** set *pnMax to zero and return an SQLite error code. */ -struct PendingList { - int nData; - char *aData; - int nSpace; - sqlite3_int64 iLastDocid; - sqlite3_int64 iLastCol; - sqlite3_int64 iLastPos; -}; +SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ + int rc; + int mxLevel = 0; + sqlite3_stmt *pStmt = 0; + + rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + mxLevel = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_reset(pStmt); + } + *pnMax = mxLevel; + return rc; +} + +/* +** Insert a record into the %_segdir table. +*/ +static int fts3WriteSegdir( + Fts3Table *p, /* Virtual table handle */ + sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ + int iIdx, /* Value for "idx" field */ + sqlite3_int64 iStartBlock, /* Value for "start_block" field */ + sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ + sqlite3_int64 iEndBlock, /* Value for "end_block" field */ + sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ + char *zRoot, /* Blob value for "root" field */ + int nRoot /* Number of bytes in buffer zRoot */ +){ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pStmt, 1, iLevel); + sqlite3_bind_int(pStmt, 2, iIdx); + sqlite3_bind_int64(pStmt, 3, iStartBlock); + sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); + if( nLeafData==0 ){ + sqlite3_bind_int64(pStmt, 5, iEndBlock); + }else{ + char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); + if( !zEnd ) return SQLITE_NOMEM; + sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + } + sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + } + return rc; +} +/* +** Return the size of the common prefix (if any) shared by zPrev and +** zNext, in bytes. For example, +** +** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 +** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 +** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 +*/ +static int fts3PrefixCompress( + const char *zPrev, /* Buffer containing previous term */ + int nPrev, /* Size of buffer zPrev in bytes */ + const char *zNext, /* Buffer containing next term */ + int nNext /* Size of buffer zNext in bytes */ +){ + int n; + UNUSED_PARAMETER(nNext); + for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++); + return n; +} /* -** Each cursor has a (possibly empty) linked list of the following objects. +** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger +** (according to memcmp) than the previous term. */ -struct Fts3DeferredToken { - Fts3PhraseToken *pToken; /* Pointer to corresponding expr token */ - int iCol; /* Column token must occur in */ - Fts3DeferredToken *pNext; /* Next in list of deferred tokens */ - PendingList *pList; /* Doclist is assembled here */ -}; +static int fts3NodeAddTerm( + Fts3Table *p, /* Virtual table handle */ + SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */ + int isCopyTerm, /* True if zTerm/nTerm is transient */ + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm /* Size of term in bytes */ +){ + SegmentNode *pTree = *ppTree; + int rc; + SegmentNode *pNew; -/* -** An instance of this structure is used to iterate through the terms on -** a contiguous set of segment b-tree leaf nodes. Although the details of -** this structure are only manipulated by code in this file, opaque handles -** of type Fts3SegReader* are also used by code in fts3.c to iterate through -** terms when querying the full-text index. See functions: -** -** sqlite3Fts3SegReaderNew() -** sqlite3Fts3SegReaderFree() -** sqlite3Fts3SegReaderIterate() -** -** Methods used to manipulate Fts3SegReader structures: -** -** fts3SegReaderNext() -** fts3SegReaderFirstDocid() -** fts3SegReaderNextDocid() -*/ -struct Fts3SegReader { - int iIdx; /* Index within level, or 0x7FFFFFFF for PT */ - u8 bLookup; /* True for a lookup only */ - u8 rootOnly; /* True for a root-only reader */ + /* First try to append the term to the current node. Return early if + ** this is possible. + */ + if( pTree ){ + int nData = pTree->nData; /* Current size of node in bytes */ + int nReq = nData; /* Required space after adding zTerm */ + int nPrefix; /* Number of bytes of prefix compression */ + int nSuffix; /* Suffix length */ - sqlite3_int64 iStartBlock; /* Rowid of first leaf block to traverse */ - sqlite3_int64 iLeafEndBlock; /* Rowid of final leaf block to traverse */ - sqlite3_int64 iEndBlock; /* Rowid of final block in segment (or 0) */ - sqlite3_int64 iCurrentBlock; /* Current leaf block (or 0) */ + nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; - char *aNode; /* Pointer to node data (or NULL) */ - int nNode; /* Size of buffer at aNode (or 0) */ - int nPopulate; /* If >0, bytes of buffer aNode[] loaded */ - sqlite3_blob *pBlob; /* If not NULL, blob handle to read node */ + nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; + if( nReq<=p->nNodeSize || !pTree->zTerm ){ - Fts3HashElem **ppNextElem; + if( nReq>p->nNodeSize ){ + /* An unusual case: this is the first term to be added to the node + ** and the static node buffer (p->nNodeSize bytes) is not large + ** enough. Use a separately malloced buffer instead This wastes + ** p->nNodeSize bytes, but since this scenario only comes about when + ** the database contain two terms that share a prefix of almost 2KB, + ** this is not expected to be a serious problem. + */ + assert( pTree->aData==(char *)&pTree[1] ); + pTree->aData = (char *)sqlite3_malloc(nReq); + if( !pTree->aData ){ + return SQLITE_NOMEM; + } + } - /* Variables set by fts3SegReaderNext(). These may be read directly - ** by the caller. They are valid from the time SegmentReaderNew() returns - ** until SegmentReaderNext() returns something other than SQLITE_OK - ** (i.e. SQLITE_DONE). - */ - int nTerm; /* Number of bytes in current term */ - char *zTerm; /* Pointer to current term */ - int nTermAlloc; /* Allocated size of zTerm buffer */ - char *aDoclist; /* Pointer to doclist of current entry */ - int nDoclist; /* Size of doclist in current entry */ + if( pTree->zTerm ){ + /* There is no prefix-length field for first term in a node */ + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); + } - /* The following variables are used by fts3SegReaderNextDocid() to iterate - ** through the current doclist (aDoclist/nDoclist). + nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); + memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); + pTree->nData = nData + nSuffix; + pTree->nEntry++; + + if( isCopyTerm ){ + if( pTree->nMalloc<nTerm ){ + char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pTree->nMalloc = nTerm*2; + pTree->zMalloc = zNew; + } + pTree->zTerm = pTree->zMalloc; + memcpy(pTree->zTerm, zTerm, nTerm); + pTree->nTerm = nTerm; + }else{ + pTree->zTerm = (char *)zTerm; + pTree->nTerm = nTerm; + } + return SQLITE_OK; + } + } + + /* If control flows to here, it was not possible to append zTerm to the + ** current node. Create a new node (a right-sibling of the current node). + ** If this is the first node in the tree, the term is added to it. + ** + ** Otherwise, the term is not added to the new node, it is left empty for + ** now. Instead, the term is inserted into the parent of pTree. If pTree + ** has no parent, one is created here. */ - char *pOffsetList; - int nOffsetList; /* For descending pending seg-readers only */ - sqlite3_int64 iDocid; -}; + pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); + if( !pNew ){ + return SQLITE_NOMEM; + } + memset(pNew, 0, sizeof(SegmentNode)); + pNew->nData = 1 + FTS3_VARINT_MAX; + pNew->aData = (char *)&pNew[1]; -#define fts3SegReaderIsPending(p) ((p)->ppNextElem!=0) -#define fts3SegReaderIsRootOnly(p) ((p)->rootOnly!=0) + if( pTree ){ + SegmentNode *pParent = pTree->pParent; + rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); + if( pTree->pParent==0 ){ + pTree->pParent = pParent; + } + pTree->pRight = pNew; + pNew->pLeftmost = pTree->pLeftmost; + pNew->pParent = pParent; + pNew->zMalloc = pTree->zMalloc; + pNew->nMalloc = pTree->nMalloc; + pTree->zMalloc = 0; + }else{ + pNew->pLeftmost = pNew; + rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); + } + + *ppTree = pNew; + return rc; +} /* -** An instance of this structure is used to create a segment b-tree in the -** database. The internal details of this type are only accessed by the -** following functions: -** -** fts3SegWriterAdd() -** fts3SegWriterFlush() -** fts3SegWriterFree() +** Helper function for fts3NodeWrite(). */ -struct SegmentWriter { - SegmentNode *pTree; /* Pointer to interior tree structure */ - sqlite3_int64 iFirst; /* First slot in %_segments written */ - sqlite3_int64 iFree; /* Next free slot in %_segments */ - char *zTerm; /* Pointer to previous term buffer */ - int nTerm; /* Number of bytes in zTerm */ - int nMalloc; /* Size of malloc'd buffer at zMalloc */ - char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ - int nSize; /* Size of allocation at aData */ - int nData; /* Bytes of data in aData */ - char *aData; /* Pointer to block from malloc() */ - i64 nLeafData; /* Number of bytes of leaf data written */ -}; +static int fts3TreeFinishNode( + SegmentNode *pTree, + int iHeight, + sqlite3_int64 iLeftChild +){ + int nStart; + assert( iHeight>=1 && iHeight<128 ); + nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); + pTree->aData[nStart] = (char)iHeight; + sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); + return nStart; +} /* -** Type SegmentNode is used by the following three functions to create -** the interior part of the segment b+-tree structures (everything except -** the leaf nodes). These functions and type are only ever used by code -** within the fts3SegWriterXXX() family of functions described above. +** Write the buffer for the segment node pTree and all of its peers to the +** database. Then call this function recursively to write the parent of +** pTree and its peers to the database. ** -** fts3NodeAddTerm() -** fts3NodeWrite() -** fts3NodeFree() +** Except, if pTree is a root node, do not write it to the database. Instead, +** set output variables *paRoot and *pnRoot to contain the root node. ** -** When a b+tree is written to the database (either as a result of a merge -** or the pending-terms table being flushed), leaves are written into the -** database file as soon as they are completely populated. The interior of -** the tree is assembled in memory and written out only once all leaves have -** been populated and stored. This is Ok, as the b+-tree fanout is usually -** very large, meaning that the interior of the tree consumes relatively -** little memory. +** If successful, SQLITE_OK is returned and output variable *piLast is +** set to the largest blockid written to the database (or zero if no +** blocks were written to the db). Otherwise, an SQLite error code is +** returned. */ -struct SegmentNode { - SegmentNode *pParent; /* Parent node (or NULL for root node) */ - SegmentNode *pRight; /* Pointer to right-sibling */ - SegmentNode *pLeftmost; /* Pointer to left-most node of this depth */ - int nEntry; /* Number of terms written to node so far */ - char *zTerm; /* Pointer to previous term buffer */ - int nTerm; /* Number of bytes in zTerm */ - int nMalloc; /* Size of malloc'd buffer at zMalloc */ - char *zMalloc; /* Malloc'd space (possibly) used for zTerm */ - int nData; /* Bytes of valid data so far */ - char *aData; /* Node data */ -}; +static int fts3NodeWrite( + Fts3Table *p, /* Virtual table handle */ + SegmentNode *pTree, /* SegmentNode handle */ + int iHeight, /* Height of this node in tree */ + sqlite3_int64 iLeaf, /* Block id of first leaf node */ + sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ + sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ + char **paRoot, /* OUT: Data for root node */ + int *pnRoot /* OUT: Size of root node in bytes */ +){ + int rc = SQLITE_OK; -/* -** Valid values for the second argument to fts3SqlStmt(). -*/ -#define SQL_DELETE_CONTENT 0 -#define SQL_IS_EMPTY 1 -#define SQL_DELETE_ALL_CONTENT 2 -#define SQL_DELETE_ALL_SEGMENTS 3 -#define SQL_DELETE_ALL_SEGDIR 4 -#define SQL_DELETE_ALL_DOCSIZE 5 -#define SQL_DELETE_ALL_STAT 6 -#define SQL_SELECT_CONTENT_BY_ROWID 7 -#define SQL_NEXT_SEGMENT_INDEX 8 -#define SQL_INSERT_SEGMENTS 9 -#define SQL_NEXT_SEGMENTS_ID 10 -#define SQL_INSERT_SEGDIR 11 -#define SQL_SELECT_LEVEL 12 -#define SQL_SELECT_LEVEL_RANGE 13 -#define SQL_SELECT_LEVEL_COUNT 14 -#define SQL_SELECT_SEGDIR_MAX_LEVEL 15 -#define SQL_DELETE_SEGDIR_LEVEL 16 -#define SQL_DELETE_SEGMENTS_RANGE 17 -#define SQL_CONTENT_INSERT 18 -#define SQL_DELETE_DOCSIZE 19 -#define SQL_REPLACE_DOCSIZE 20 -#define SQL_SELECT_DOCSIZE 21 -#define SQL_SELECT_STAT 22 -#define SQL_REPLACE_STAT 23 + if( !pTree->pParent ){ + /* Root node of the tree. */ + int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); + *piLast = iFree-1; + *pnRoot = pTree->nData - nStart; + *paRoot = &pTree->aData[nStart]; + }else{ + SegmentNode *pIter; + sqlite3_int64 iNextFree = iFree; + sqlite3_int64 iNextLeaf = iLeaf; + for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ + int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); + int nWrite = pIter->nData - nStart; + + rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); + iNextFree++; + iNextLeaf += (pIter->nEntry+1); + } + if( rc==SQLITE_OK ){ + assert( iNextLeaf==iFree ); + rc = fts3NodeWrite( + p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot + ); + } + } -#define SQL_SELECT_ALL_PREFIX_LEVEL 24 -#define SQL_DELETE_ALL_TERMS_SEGDIR 25 -#define SQL_DELETE_SEGDIR_RANGE 26 -#define SQL_SELECT_ALL_LANGID 27 -#define SQL_FIND_MERGE_LEVEL 28 -#define SQL_MAX_LEAF_NODE_ESTIMATE 29 -#define SQL_DELETE_SEGDIR_ENTRY 30 -#define SQL_SHIFT_SEGDIR_ENTRY 31 -#define SQL_SELECT_SEGDIR 32 -#define SQL_CHOMP_SEGDIR 33 -#define SQL_SEGMENT_IS_APPENDABLE 34 -#define SQL_SELECT_INDEXES 35 -#define SQL_SELECT_MXLEVEL 36 + return rc; +} -#define SQL_SELECT_LEVEL_RANGE2 37 -#define SQL_UPDATE_LEVEL_IDX 38 -#define SQL_UPDATE_LEVEL 39 +/* +** Free all memory allocations associated with the tree pTree. +*/ +static void fts3NodeFree(SegmentNode *pTree){ + if( pTree ){ + SegmentNode *p = pTree->pLeftmost; + fts3NodeFree(p->pParent); + while( p ){ + SegmentNode *pRight = p->pRight; + if( p->aData!=(char *)&p[1] ){ + sqlite3_free(p->aData); + } + assert( pRight==0 || p->zMalloc==0 ); + sqlite3_free(p->zMalloc); + sqlite3_free(p); + p = pRight; + } + } +} /* -** This function is used to obtain an SQLite prepared statement handle -** for the statement identified by the second argument. If successful, -** *pp is set to the requested statement handle and SQLITE_OK returned. -** Otherwise, an SQLite error code is returned and *pp is set to 0. +** Add a term to the segment being constructed by the SegmentWriter object +** *ppWriter. When adding the first term to a segment, *ppWriter should +** be passed NULL. This function will allocate a new SegmentWriter object +** and return it via the input/output variable *ppWriter in this case. ** -** If argument apVal is not NULL, then it must point to an array with -** at least as many entries as the requested statement has bound -** parameters. The values are bound to the statements parameters before -** returning. +** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. */ -static int fts3SqlStmt( +static int fts3SegWriterAdd( Fts3Table *p, /* Virtual table handle */ - int eStmt, /* One of the SQL_XXX constants above */ - sqlite3_stmt **pp, /* OUT: Statement handle */ - sqlite3_value **apVal /* Values to bind to statement */ + SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ + int isCopyTerm, /* True if buffer zTerm must be copied */ + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of term in bytes */ + const char *aDoclist, /* Pointer to buffer containing doclist */ + int nDoclist /* Size of doclist in bytes */ ){ - const char *azSql[] = { -/* 0 */ "DELETE FROM %Q.'%q_content' WHERE rowid = ?", -/* 1 */ "SELECT NOT EXISTS(SELECT docid FROM %Q.'%q_content' WHERE rowid!=?)", -/* 2 */ "DELETE FROM %Q.'%q_content'", -/* 3 */ "DELETE FROM %Q.'%q_segments'", -/* 4 */ "DELETE FROM %Q.'%q_segdir'", -/* 5 */ "DELETE FROM %Q.'%q_docsize'", -/* 6 */ "DELETE FROM %Q.'%q_stat'", -/* 7 */ "SELECT %s WHERE rowid=?", -/* 8 */ "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1", -/* 9 */ "REPLACE INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)", -/* 10 */ "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)", -/* 11 */ "REPLACE INTO %Q.'%q_segdir' VALUES(?,?,?,?,?,?)", - - /* Return segments in order from oldest to newest.*/ -/* 12 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level = ? ORDER BY idx ASC", -/* 13 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?" - "ORDER BY level DESC, idx ASC", - -/* 14 */ "SELECT count(*) FROM %Q.'%q_segdir' WHERE level = ?", -/* 15 */ "SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ + int nReq; /* Number of bytes required on leaf page */ + int nData; + SegmentWriter *pWriter = *ppWriter; -/* 16 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ?", -/* 17 */ "DELETE FROM %Q.'%q_segments' WHERE blockid BETWEEN ? AND ?", -/* 18 */ "INSERT INTO %Q.'%q_content' VALUES(%s)", -/* 19 */ "DELETE FROM %Q.'%q_docsize' WHERE docid = ?", -/* 20 */ "REPLACE INTO %Q.'%q_docsize' VALUES(?,?)", -/* 21 */ "SELECT size FROM %Q.'%q_docsize' WHERE docid=?", -/* 22 */ "SELECT value FROM %Q.'%q_stat' WHERE id=?", -/* 23 */ "REPLACE INTO %Q.'%q_stat' VALUES(?,?)", -/* 24 */ "", -/* 25 */ "", + if( !pWriter ){ + int rc; + sqlite3_stmt *pStmt; -/* 26 */ "DELETE FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ?", -/* 27 */ "SELECT ? UNION SELECT level / (1024 * ?) FROM %Q.'%q_segdir'", + /* Allocate the SegmentWriter structure */ + pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); + if( !pWriter ) return SQLITE_NOMEM; + memset(pWriter, 0, sizeof(SegmentWriter)); + *ppWriter = pWriter; -/* This statement is used to determine which level to read the input from -** when performing an incremental merge. It returns the absolute level number -** of the oldest level in the db that contains at least ? segments. Or, -** if no level in the FTS index contains more than ? segments, the statement -** returns zero rows. */ -/* 28 */ "SELECT level FROM %Q.'%q_segdir' GROUP BY level HAVING count(*)>=?" - " ORDER BY (level %% 1024) ASC LIMIT 1", + /* Allocate a buffer in which to accumulate data */ + pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); + if( !pWriter->aData ) return SQLITE_NOMEM; + pWriter->nSize = p->nNodeSize; -/* Estimate the upper limit on the number of leaf nodes in a new segment -** created by merging the oldest :2 segments from absolute level :1. See -** function sqlite3Fts3Incrmerge() for details. */ -/* 29 */ "SELECT 2 * total(1 + leaves_end_block - start_block) " - " FROM %Q.'%q_segdir' WHERE level = ? AND idx < ?", + /* Find the next free blockid in the %_segments table */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + pWriter->iFree = sqlite3_column_int64(pStmt, 0); + pWriter->iFirst = pWriter->iFree; + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ) return rc; + } + nData = pWriter->nData; -/* SQL_DELETE_SEGDIR_ENTRY -** Delete the %_segdir entry on absolute level :1 with index :2. */ -/* 30 */ "DELETE FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); + nSuffix = nTerm-nPrefix; -/* SQL_SHIFT_SEGDIR_ENTRY -** Modify the idx value for the segment with idx=:3 on absolute level :2 -** to :1. */ -/* 31 */ "UPDATE %Q.'%q_segdir' SET idx = ? WHERE level=? AND idx=?", + /* Figure out how many bytes are required by this new entry */ + nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ + nSuffix + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ -/* SQL_SELECT_SEGDIR -** Read a single entry from the %_segdir table. The entry from absolute -** level :1 with index value :2. */ -/* 32 */ "SELECT idx, start_block, leaves_end_block, end_block, root " - "FROM %Q.'%q_segdir' WHERE level = ? AND idx = ?", + if( nData>0 && nData+nReq>p->nNodeSize ){ + int rc; -/* SQL_CHOMP_SEGDIR -** Update the start_block (:1) and root (:2) fields of the %_segdir -** entry located on absolute level :3 with index :4. */ -/* 33 */ "UPDATE %Q.'%q_segdir' SET start_block = ?, root = ?" - "WHERE level = ? AND idx = ?", + /* The current leaf node is full. Write it out to the database. */ + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); + if( rc!=SQLITE_OK ) return rc; + p->nLeafAdd++; -/* SQL_SEGMENT_IS_APPENDABLE -** Return a single row if the segment with end_block=? is appendable. Or -** no rows otherwise. */ -/* 34 */ "SELECT 1 FROM %Q.'%q_segments' WHERE blockid=? AND block IS NULL", + /* Add the current term to the interior node tree. The term added to + ** the interior tree must: + ** + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pWriter->zTerm), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. + */ + assert( nPrefix<nTerm ); + rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1); + if( rc!=SQLITE_OK ) return rc; -/* SQL_SELECT_INDEXES -** Return the list of valid segment indexes for absolute level ? */ -/* 35 */ "SELECT idx FROM %Q.'%q_segdir' WHERE level=? ORDER BY 1 ASC", + nData = 0; + pWriter->nTerm = 0; -/* SQL_SELECT_MXLEVEL -** Return the largest relative level in the FTS index or indexes. */ -/* 36 */ "SELECT max( level %% 1024 ) FROM %Q.'%q_segdir'", + nPrefix = 0; + nSuffix = nTerm; + nReq = 1 + /* varint containing prefix size */ + sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ + nTerm + /* Term suffix */ + sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ + nDoclist; /* Doclist data */ + } - /* Return segments in order from oldest to newest.*/ -/* 37 */ "SELECT level, idx, end_block " - "FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? " - "ORDER BY level DESC, idx ASC", + /* Increase the total number of bytes written to account for the new entry. */ + pWriter->nLeafData += nReq; - /* Update statements used while promoting segments */ -/* 38 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=-1,idx=? " - "WHERE level=? AND idx=?", -/* 39 */ "UPDATE OR FAIL %Q.'%q_segdir' SET level=? WHERE level=-1" + /* If the buffer currently allocated is too small for this entry, realloc + ** the buffer to make it large enough. + */ + if( nReq>pWriter->nSize ){ + char *aNew = sqlite3_realloc(pWriter->aData, nReq); + if( !aNew ) return SQLITE_NOMEM; + pWriter->aData = aNew; + pWriter->nSize = nReq; + } + assert( nData+nReq<=pWriter->nSize ); - }; - int rc = SQLITE_OK; - sqlite3_stmt *pStmt; + /* Append the prefix-compressed term and doclist to the buffer. */ + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); + memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); + nData += nSuffix; + nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); + memcpy(&pWriter->aData[nData], aDoclist, nDoclist); + pWriter->nData = nData + nDoclist; - assert( SizeofArray(azSql)==SizeofArray(p->aStmt) ); - assert( eStmt<SizeofArray(azSql) && eStmt>=0 ); - - pStmt = p->aStmt[eStmt]; - if( !pStmt ){ - char *zSql; - if( eStmt==SQL_CONTENT_INSERT ){ - zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist); - }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){ - zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist); - }else{ - zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName); - } - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, NULL); - sqlite3_free(zSql); - assert( rc==SQLITE_OK || pStmt==0 ); - p->aStmt[eStmt] = pStmt; - } - } - if( apVal ){ - int i; - int nParam = sqlite3_bind_parameter_count(pStmt); - for(i=0; rc==SQLITE_OK && i<nParam; i++){ - rc = sqlite3_bind_value(pStmt, i+1, apVal[i]); + /* Save the current term so that it can be used to prefix-compress the next. + ** If the isCopyTerm parameter is true, then the buffer pointed to by + ** zTerm is transient, so take a copy of the term data. Otherwise, just + ** store a copy of the pointer. + */ + if( isCopyTerm ){ + if( nTerm>pWriter->nMalloc ){ + char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); + if( !zNew ){ + return SQLITE_NOMEM; + } + pWriter->nMalloc = nTerm*2; + pWriter->zMalloc = zNew; + pWriter->zTerm = zNew; } + assert( pWriter->zTerm==pWriter->zMalloc ); + memcpy(pWriter->zTerm, zTerm, nTerm); + }else{ + pWriter->zTerm = (char *)zTerm; } - *pp = pStmt; - return rc; -} + pWriter->nTerm = nTerm; + return SQLITE_OK; +} -static int fts3SelectDocsize( - Fts3Table *pTab, /* FTS3 table handle */ - sqlite3_int64 iDocid, /* Docid to bind for SQL_SELECT_DOCSIZE */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ +/* +** Flush all data associated with the SegmentWriter object pWriter to the +** database. This function must be called after all terms have been added +** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is +** returned. Otherwise, an SQLite error code. +*/ +static int fts3SegWriterFlush( + Fts3Table *p, /* Virtual table handle */ + SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ + sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ + int iIdx /* Value for 'idx' column of %_segdir */ ){ - sqlite3_stmt *pStmt = 0; /* Statement requested from fts3SqlStmt() */ int rc; /* Return code */ + if( pWriter->pTree ){ + sqlite3_int64 iLast = 0; /* Largest block id written to database */ + sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ + char *zRoot = NULL; /* Pointer to buffer containing root node */ + int nRoot = 0; /* Size of buffer zRoot */ - rc = fts3SqlStmt(pTab, SQL_SELECT_DOCSIZE, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iDocid); - rc = sqlite3_step(pStmt); - if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){ - rc = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; - pStmt = 0; - }else{ - rc = SQLITE_OK; + iLastLeaf = pWriter->iFree; + rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); + if( rc==SQLITE_OK ){ + rc = fts3NodeWrite(p, pWriter->pTree, 1, + pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, iLevel, iIdx, + pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); } + }else{ + /* The entire tree fits on the root node. Write it to the segdir table. */ + rc = fts3WriteSegdir(p, iLevel, iIdx, + 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); } - - *ppStmt = pStmt; + p->nLeafAdd++; return rc; } -SQLITE_PRIVATE int sqlite3Fts3SelectDoctotal( - Fts3Table *pTab, /* Fts3 table handle */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ -){ - sqlite3_stmt *pStmt = 0; +/* +** Release all memory held by the SegmentWriter object passed as the +** first argument. +*/ +static void fts3SegWriterFree(SegmentWriter *pWriter){ + if( pWriter ){ + sqlite3_free(pWriter->aData); + sqlite3_free(pWriter->zMalloc); + fts3NodeFree(pWriter->pTree); + sqlite3_free(pWriter); + } +} + +/* +** The first value in the apVal[] array is assumed to contain an integer. +** This function tests if there exist any documents with docid values that +** are different from that integer. i.e. if deleting the document with docid +** pRowid would mean the FTS3 table were empty. +** +** If successful, *pisEmpty is set to true if the table is empty except for +** document pRowid, or false otherwise, and SQLITE_OK is returned. If an +** error occurs, an SQLite error code is returned. +*/ +static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ + sqlite3_stmt *pStmt; int rc; - rc = fts3SqlStmt(pTab, SQL_SELECT_STAT, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - if( sqlite3_step(pStmt)!=SQLITE_ROW - || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB - ){ + if( p->zContentTbl ){ + /* If using the content=xxx option, assume the table is never empty */ + *pisEmpty = 0; + rc = SQLITE_OK; + }else{ + rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pisEmpty = sqlite3_column_int(pStmt, 0); + } rc = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB; - pStmt = 0; } } - *ppStmt = pStmt; return rc; } -SQLITE_PRIVATE int sqlite3Fts3SelectDocsize( - Fts3Table *pTab, /* Fts3 table handle */ - sqlite3_int64 iDocid, /* Docid to read size data for */ - sqlite3_stmt **ppStmt /* OUT: Statement handle */ -){ - return fts3SelectDocsize(pTab, iDocid, ppStmt); -} - /* -** Similar to fts3SqlStmt(). Except, after binding the parameters in -** array apVal[] to the SQL statement identified by eStmt, the statement -** is executed. +** Set *pnMax to the largest segment level in the database for the index +** iIndex. ** -** Returns SQLITE_OK if the statement is successfully executed, or an -** SQLite error code otherwise. +** Segment levels are stored in the 'level' column of the %_segdir table. +** +** Return SQLITE_OK if successful, or an SQLite error code if not. */ -static void fts3SqlExec( - int *pRC, /* Result code */ - Fts3Table *p, /* The FTS3 table */ - int eStmt, /* Index of statement to evaluate */ - sqlite3_value **apVal /* Parameters to bind */ +static int fts3SegmentMaxLevel( + Fts3Table *p, + int iLangid, + int iIndex, + sqlite3_int64 *pnMax ){ sqlite3_stmt *pStmt; int rc; - if( *pRC ) return; - rc = fts3SqlStmt(p, eStmt, &pStmt, apVal); - if( rc==SQLITE_OK ){ - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); + assert( iIndex>=0 && iIndex<p->nIndex ); + + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pStmt, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pnMax = sqlite3_column_int64(pStmt, 0); } - *pRC = rc; + return sqlite3_reset(pStmt); } +/* +** iAbsLevel is an absolute level that may be assumed to exist within +** the database. This function checks if it is the largest level number +** within its index. Assuming no error occurs, *pbMax is set to 1 if +** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK +** is returned. If an error occurs, an error code is returned and the +** final value of *pbMax is undefined. +*/ +static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ + + /* Set pStmt to the compiled version of: + ** + ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? + ** + ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). + */ + sqlite3_stmt *pStmt; + int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); + if( rc!=SQLITE_OK ) return rc; + sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); + sqlite3_bind_int64(pStmt, 2, + ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL + ); + + *pbMax = 0; + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; + } + return sqlite3_reset(pStmt); +} /* -** This function ensures that the caller has obtained an exclusive -** shared-cache table-lock on the %_segdir table. This is required before -** writing data to the fts3 table. If this lock is not acquired first, then -** the caller may end up attempting to take this lock as part of committing -** a transaction, causing SQLite to return SQLITE_LOCKED or -** LOCKED_SHAREDCACHEto a COMMIT command. -** -** It is best to avoid this because if FTS3 returns any error when -** committing a transaction, the whole transaction will be rolled back. -** And this is not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. -** It can still happen if the user locks the underlying tables directly -** instead of accessing them via FTS. +** Delete all entries in the %_segments table associated with the segment +** opened with seg-reader pSeg. This function does not affect the contents +** of the %_segdir table. */ -static int fts3Writelock(Fts3Table *p){ - int rc = SQLITE_OK; - - if( p->nPendingData==0 ){ - sqlite3_stmt *pStmt; - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pStmt, 0); +static int fts3DeleteSegment( + Fts3Table *p, /* FTS table handle */ + Fts3SegReader *pSeg /* Segment to delete */ +){ + int rc = SQLITE_OK; /* Return code */ + if( pSeg->iStartBlock ){ + sqlite3_stmt *pDelete; /* SQL statement to delete rows */ + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_null(pStmt, 1); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); + sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); + sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); } } - return rc; } /* -** FTS maintains a separate indexes for each language-id (a 32-bit integer). -** Within each language id, a separate index is maintained to store the -** document terms, and each configured prefix size (configured the FTS -** "prefix=" option). And each index consists of multiple levels ("relative -** levels"). -** -** All three of these values (the language id, the specific index and the -** level within the index) are encoded in 64-bit integer values stored -** in the %_segdir table on disk. This function is used to convert three -** separate component values into the single 64-bit integer value that -** can be used to query the %_segdir table. +** This function is used after merging multiple segments into a single large +** segment to delete the old, now redundant, segment b-trees. Specifically, +** it: +** +** 1) Deletes all %_segments entries for the segments associated with +** each of the SegReader objects in the array passed as the third +** argument, and ** -** Specifically, each language-id/index combination is allocated 1024 -** 64-bit integer level values ("absolute levels"). The main terms index -** for language-id 0 is allocate values 0-1023. The first prefix index -** (if any) for language-id 0 is allocated values 1024-2047. And so on. -** Language 1 indexes are allocated immediately following language 0. +** 2) deletes all %_segdir entries with level iLevel, or all %_segdir +** entries regardless of level if (iLevel<0). ** -** So, for a system with nPrefix prefix indexes configured, the block of -** absolute levels that corresponds to language-id iLangid and index -** iIndex starts at absolute level ((iLangid * (nPrefix+1) + iIndex) * 1024). +** SQLITE_OK is returned if successful, otherwise an SQLite error code. */ -static sqlite3_int64 getAbsoluteLevel( - Fts3Table *p, /* FTS3 table handle */ +static int fts3DeleteSegdir( + Fts3Table *p, /* Virtual table handle */ int iLangid, /* Language id */ - int iIndex, /* Index in p->aIndex[] */ - int iLevel /* Level of segments */ + int iIndex, /* Index for p->aIndex */ + int iLevel, /* Level of %_segdir entries to delete */ + Fts3SegReader **apSegment, /* Array of SegReader objects */ + int nReader /* Size of array apSegment */ ){ - sqlite3_int64 iBase; /* First absolute level for iLangid/iIndex */ - assert( iLangid>=0 ); - assert( p->nIndex>0 ); - assert( iIndex>=0 && iIndex<p->nIndex ); + int rc = SQLITE_OK; /* Return Code */ + int i; /* Iterator variable */ + sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ - iBase = ((sqlite3_int64)iLangid * p->nIndex + iIndex) * FTS3_SEGDIR_MAXLEVEL; - return iBase + iLevel; + for(i=0; rc==SQLITE_OK && i<nReader; i++){ + rc = fts3DeleteSegment(p, apSegment[i]); + } + if( rc!=SQLITE_OK ){ + return rc; + } + + assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); + if( iLevel==FTS3_SEGCURSOR_ALL ){ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); + sqlite3_bind_int64(pDelete, 2, + getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) + ); + } + }else{ + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64( + pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) + ); + } + } + + if( rc==SQLITE_OK ){ + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } + + return rc; } /* -** Set *ppStmt to a statement handle that may be used to iterate through -** all rows in the %_segdir table, from oldest to newest. If successful, -** return SQLITE_OK. If an error occurs while preparing the statement, -** return an SQLite error code. -** -** There is only ever one instance of this SQL statement compiled for -** each FTS3 table. +** When this function is called, buffer *ppList (size *pnList bytes) contains +** a position list that may (or may not) feature multiple columns. This +** function adjusts the pointer *ppList and the length *pnList so that they +** identify the subset of the position list that corresponds to column iCol. ** -** The statement returns the following columns from the %_segdir table: +** If there are no entries in the input position list for column iCol, then +** *pnList is set to zero before returning. ** -** 0: idx -** 1: start_block -** 2: leaves_end_block -** 3: end_block -** 4: root +** If parameter bZero is non-zero, then any part of the input list following +** the end of the output list is zeroed before returning. */ -SQLITE_PRIVATE int sqlite3Fts3AllSegdirs( - Fts3Table *p, /* FTS3 table */ - int iLangid, /* Language being queried */ - int iIndex, /* Index for p->aIndex[] */ - int iLevel, /* Level to select (relative level) */ - sqlite3_stmt **ppStmt /* OUT: Compiled statement */ +static void fts3ColumnFilter( + int iCol, /* Column to filter on */ + int bZero, /* Zero out anything following *ppList */ + char **ppList, /* IN/OUT: Pointer to position list */ + int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ ){ - int rc; - sqlite3_stmt *pStmt = 0; - - assert( iLevel==FTS3_SEGCURSOR_ALL || iLevel>=0 ); - assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); - assert( iIndex>=0 && iIndex<p->nIndex ); + char *pList = *ppList; + int nList = *pnList; + char *pEnd = &pList[nList]; + int iCurrent = 0; + char *p = pList; - if( iLevel<0 ){ - /* "SELECT * FROM %_segdir WHERE level BETWEEN ? AND ? ORDER BY ..." */ - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pStmt, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); + assert( iCol>=0 ); + while( 1 ){ + char c = 0; + while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80; + + if( iCol==iCurrent ){ + nList = (int)(p - pList); + break; } - }else{ - /* "SELECT * FROM %_segdir WHERE level = ? ORDER BY ..." */ - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex,iLevel)); + + nList -= (int)(p - pList); + pList = p; + if( nList==0 ){ + break; } + p = &pList[1]; + p += fts3GetVarint32(p, &iCurrent); } - *ppStmt = pStmt; - return rc; + + if( bZero && &pList[nList]!=pEnd ){ + memset(&pList[nList], 0, pEnd - &pList[nList]); + } + *ppList = pList; + *pnList = nList; +} + +/* +** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any +** existing data). Grow the buffer if required. +** +** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered +** trying to resize the buffer, return SQLITE_NOMEM. +*/ +static int fts3MsrBufferData( + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + char *pList, + int nList +){ + if( nList>pMsr->nBuffer ){ + char *pNew; + pMsr->nBuffer = nList*2; + pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); + if( !pNew ) return SQLITE_NOMEM; + pMsr->aBuffer = pNew; + } + + memcpy(pMsr->aBuffer, pList, nList); + return SQLITE_OK; } +SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ + sqlite3_int64 *piDocid, /* OUT: Docid value */ + char **paPoslist, /* OUT: Pointer to position list */ + int *pnPoslist /* OUT: Size of position list in bytes */ +){ + int nMerge = pMsr->nAdvance; + Fts3SegReader **apSegment = pMsr->apSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + if( nMerge==0 ){ + *paPoslist = 0; + return SQLITE_OK; + } + + while( 1 ){ + Fts3SegReader *pSeg; + pSeg = pMsr->apSegment[0]; + + if( pSeg->pOffsetList==0 ){ + *paPoslist = 0; + break; + }else{ + int rc; + char *pList; + int nList; + int j; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + + rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( rc==SQLITE_OK + && j<nMerge + && apSegment[j]->pOffsetList + && apSegment[j]->iDocid==iDocid + ){ + rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + if( rc!=SQLITE_OK ) return rc; + fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); + + if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pMsr, pList, nList+1); + if( rc!=SQLITE_OK ) return rc; + assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); + pList = pMsr->aBuffer; + } -/* -** Append a single varint to a PendingList buffer. SQLITE_OK is returned -** if successful, or an SQLite error code otherwise. -** -** This function also serves to allocate the PendingList structure itself. -** For example, to create a new PendingList structure containing two -** varints: -** -** PendingList *p = 0; -** fts3PendingListAppendVarint(&p, 1); -** fts3PendingListAppendVarint(&p, 2); -*/ -static int fts3PendingListAppendVarint( - PendingList **pp, /* IN/OUT: Pointer to PendingList struct */ - sqlite3_int64 i /* Value to append to data */ -){ - PendingList *p = *pp; + if( pMsr->iColFilter>=0 ){ + fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); + } - /* Allocate or grow the PendingList as required. */ - if( !p ){ - p = sqlite3_malloc(sizeof(*p) + 100); - if( !p ){ - return SQLITE_NOMEM; - } - p->nSpace = 100; - p->aData = (char *)&p[1]; - p->nData = 0; - } - else if( p->nData+FTS3_VARINT_MAX+1>p->nSpace ){ - int nNew = p->nSpace * 2; - p = sqlite3_realloc(p, sizeof(*p) + nNew); - if( !p ){ - sqlite3_free(*pp); - *pp = 0; - return SQLITE_NOMEM; + if( nList>0 ){ + *paPoslist = pList; + *piDocid = iDocid; + *pnPoslist = nList; + break; + } } - p->nSpace = nNew; - p->aData = (char *)&p[1]; } - /* Append the new serialized varint to the end of the list. */ - p->nData += sqlite3Fts3PutVarint(&p->aData[p->nData], i); - p->aData[p->nData] = '\0'; - *pp = p; return SQLITE_OK; } -/* -** Add a docid/column/position entry to a PendingList structure. Non-zero -** is returned if the structure is sqlite3_realloced as part of adding -** the entry. Otherwise, zero. -** -** If an OOM error occurs, *pRc is set to SQLITE_NOMEM before returning. -** Zero is always returned in this case. Otherwise, if no OOM error occurs, -** it is set to SQLITE_OK. -*/ -static int fts3PendingListAppend( - PendingList **pp, /* IN/OUT: PendingList structure */ - sqlite3_int64 iDocid, /* Docid for entry to add */ - sqlite3_int64 iCol, /* Column for entry to add */ - sqlite3_int64 iPos, /* Position of term for entry to add */ - int *pRc /* OUT: Return code */ +static int fts3SegReaderStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + const char *zTerm, /* Term searched for (or NULL) */ + int nTerm /* Length of zTerm in bytes */ ){ - PendingList *p = *pp; - int rc = SQLITE_OK; + int i; + int nSeg = pCsr->nSegment; - assert( !p || p->iLastDocid<=iDocid ); + /* If the Fts3SegFilter defines a specific term (or term prefix) to search + ** for, then advance each segment iterator until it points to a term of + ** equal or greater value than the specified term. This prevents many + ** unnecessary merge/sort operations for the case where single segment + ** b-tree leaf nodes contain more than one term. + */ + for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){ + int res = 0; + Fts3SegReader *pSeg = pCsr->apSegment[i]; + do { + int rc = fts3SegReaderNext(p, pSeg, 0); + if( rc!=SQLITE_OK ) return rc; + }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); - if( !p || p->iLastDocid!=iDocid ){ - sqlite3_int64 iDelta = iDocid - (p ? p->iLastDocid : 0); - if( p ){ - assert( p->nData<p->nSpace ); - assert( p->aData[p->nData]==0 ); - p->nData++; - } - if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iDelta)) ){ - goto pendinglistappend_out; - } - p->iLastCol = -1; - p->iLastPos = 0; - p->iLastDocid = iDocid; - } - if( iCol>0 && p->iLastCol!=iCol ){ - if( SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, 1)) - || SQLITE_OK!=(rc = fts3PendingListAppendVarint(&p, iCol)) - ){ - goto pendinglistappend_out; - } - p->iLastCol = iCol; - p->iLastPos = 0; - } - if( iCol>=0 ){ - assert( iPos>p->iLastPos || (iPos==0 && p->iLastPos==0) ); - rc = fts3PendingListAppendVarint(&p, 2+iPos-p->iLastPos); - if( rc==SQLITE_OK ){ - p->iLastPos = iPos; + if( pSeg->bLookup && res!=0 ){ + fts3SegReaderSetEof(pSeg); } } + fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); - pendinglistappend_out: - *pRc = rc; - if( p!=*pp ){ - *pp = p; - return 1; - } - return 0; + return SQLITE_OK; } -/* -** Free a PendingList object allocated by fts3PendingListAppend(). -*/ -static void fts3PendingListDelete(PendingList *pList){ - sqlite3_free(pList); +SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + Fts3SegFilter *pFilter /* Restrictions on range of iteration */ +){ + pCsr->pFilter = pFilter; + return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); } -/* -** Add an entry to one of the pending-terms hash tables. -*/ -static int fts3PendingTermsAddOne( - Fts3Table *p, - int iCol, - int iPos, - Fts3Hash *pHash, /* Pending terms hash table to add entry to */ - const char *zToken, - int nToken +SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr, /* Cursor object */ + int iCol, /* Column to match on. */ + const char *zTerm, /* Term to iterate through a doclist for */ + int nTerm /* Number of bytes in zTerm */ ){ - PendingList *pList; - int rc = SQLITE_OK; + int i; + int rc; + int nSegment = pCsr->nSegment; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); - pList = (PendingList *)fts3HashFind(pHash, zToken, nToken); - if( pList ){ - p->nPendingData -= (pList->nData + nToken + sizeof(Fts3HashElem)); - } - if( fts3PendingListAppend(&pList, p->iPrevDocid, iCol, iPos, &rc) ){ - if( pList==fts3HashInsert(pHash, zToken, nToken, pList) ){ - /* Malloc failed while inserting the new entry. This can only - ** happen if there was no previous entry for this token. - */ - assert( 0==fts3HashFind(pHash, zToken, nToken) ); - sqlite3_free(pList); - rc = SQLITE_NOMEM; + assert( pCsr->pFilter==0 ); + assert( zTerm && nTerm>0 ); + + /* Advance each segment iterator until it points to the term zTerm/nTerm. */ + rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); + if( rc!=SQLITE_OK ) return rc; + + /* Determine how many of the segments actually point to zTerm/nTerm. */ + for(i=0; i<nSegment; i++){ + Fts3SegReader *pSeg = pCsr->apSegment[i]; + if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ + break; } } - if( rc==SQLITE_OK ){ - p->nPendingData += (pList->nData + nToken + sizeof(Fts3HashElem)); + pCsr->nAdvance = i; + + /* Advance each of the segments to point to the first docid. */ + for(i=0; i<pCsr->nAdvance; i++){ + rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); + if( rc!=SQLITE_OK ) return rc; } - return rc; + fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); + + assert( iCol<0 || iCol<p->nColumn ); + pCsr->iColFilter = iCol; + + return SQLITE_OK; } /* -** Tokenize the nul-terminated string zText and add all tokens to the -** pending-terms hash-table. The docid used is that currently stored in -** p->iPrevDocid, and the column is specified by argument iCol. +** This function is called on a MultiSegReader that has been started using +** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also +** have been made. Calling this function puts the MultiSegReader in such +** a state that if the next two calls are: ** -** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +** sqlite3Fts3SegReaderStart() +** sqlite3Fts3SegReaderStep() +** +** then the entire doclist for the term is available in +** MultiSegReader.aDoclist/nDoclist. */ -static int fts3PendingTermsAdd( - Fts3Table *p, /* Table into which text will be inserted */ - int iLangid, /* Language id to use */ - const char *zText, /* Text of document to be inserted */ - int iCol, /* Column into which text is being inserted */ - u32 *pnWord /* IN/OUT: Incr. by number tokens inserted */ -){ - int rc; - int iStart = 0; - int iEnd = 0; - int iPos = 0; - int nWord = 0; +SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ + int i; /* Used to iterate through segment-readers */ - char const *zToken; - int nToken = 0; + assert( pCsr->zTerm==0 ); + assert( pCsr->nTerm==0 ); + assert( pCsr->aDoclist==0 ); + assert( pCsr->nDoclist==0 ); - sqlite3_tokenizer *pTokenizer = p->pTokenizer; - sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; - sqlite3_tokenizer_cursor *pCsr; - int (*xNext)(sqlite3_tokenizer_cursor *pCursor, - const char**,int*,int*,int*,int*); + pCsr->nAdvance = 0; + pCsr->bRestart = 1; + for(i=0; i<pCsr->nSegment; i++){ + pCsr->apSegment[i]->pOffsetList = 0; + pCsr->apSegment[i]->nOffsetList = 0; + pCsr->apSegment[i]->iDocid = 0; + } - assert( pTokenizer && pModule ); + return SQLITE_OK; +} - /* If the user has inserted a NULL value, this function may be called with - ** zText==0. In this case, add zero token entries to the hash table and - ** return early. */ - if( zText==0 ){ - *pnWord = 0; - return SQLITE_OK; - } - rc = sqlite3Fts3OpenTokenizer(pTokenizer, iLangid, zText, -1, &pCsr); - if( rc!=SQLITE_OK ){ - return rc; - } +SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( + Fts3Table *p, /* Virtual table handle */ + Fts3MultiSegReader *pCsr /* Cursor object */ +){ + int rc = SQLITE_OK; - xNext = pModule->xNext; - while( SQLITE_OK==rc - && SQLITE_OK==(rc = xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos)) - ){ + int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); + int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); + int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); + int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); + int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); + int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); + + Fts3SegReader **apSegment = pCsr->apSegment; + int nSegment = pCsr->nSegment; + Fts3SegFilter *pFilter = pCsr->pFilter; + int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( + p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp + ); + + if( pCsr->nSegment==0 ) return SQLITE_OK; + + do { + int nMerge; int i; - if( iPos>=nWord ) nWord = iPos+1; + + /* Advance the first pCsr->nAdvance entries in the apSegment[] array + ** forward. Then sort the list in order of current term again. + */ + for(i=0; i<pCsr->nAdvance; i++){ + Fts3SegReader *pSeg = apSegment[i]; + if( pSeg->bLookup ){ + fts3SegReaderSetEof(pSeg); + }else{ + rc = fts3SegReaderNext(p, pSeg, 0); + } + if( rc!=SQLITE_OK ) return rc; + } + fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); + pCsr->nAdvance = 0; - /* Positions cannot be negative; we use -1 as a terminator internally. - ** Tokens must have a non-zero length. + /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ + assert( rc==SQLITE_OK ); + if( apSegment[0]->aNode==0 ) break; + + pCsr->nTerm = apSegment[0]->nTerm; + pCsr->zTerm = apSegment[0]->zTerm; + + /* If this is a prefix-search, and if the term that apSegment[0] points + ** to does not share a suffix with pFilter->zTerm/nTerm, then all + ** required callbacks have been made. In this case exit early. + ** + ** Similarly, if this is a search for an exact match, and the first term + ** of segment apSegment[0] is not a match, exit early. */ - if( iPos<0 || !zToken || nToken<=0 ){ - rc = SQLITE_ERROR; - break; + if( pFilter->zTerm && !isScan ){ + if( pCsr->nTerm<pFilter->nTerm + || (!isPrefix && pCsr->nTerm>pFilter->nTerm) + || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) + ){ + break; + } } - /* Add the term to the terms index */ - rc = fts3PendingTermsAddOne( - p, iCol, iPos, &p->aIndex[0].hPending, zToken, nToken - ); - - /* Add the term to each of the prefix indexes that it is not too - ** short for. */ - for(i=1; rc==SQLITE_OK && i<p->nIndex; i++){ - struct Fts3Index *pIndex = &p->aIndex[i]; - if( nToken<pIndex->nPrefix ) continue; - rc = fts3PendingTermsAddOne( - p, iCol, iPos, &pIndex->hPending, zToken, pIndex->nPrefix - ); + nMerge = 1; + while( nMerge<nSegment + && apSegment[nMerge]->aNode + && apSegment[nMerge]->nTerm==pCsr->nTerm + && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) + ){ + nMerge++; } - } - pModule->xClose(pCsr); - *pnWord += nWord; - return (rc==SQLITE_DONE ? SQLITE_OK : rc); + assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); + if( nMerge==1 + && !isIgnoreEmpty + && !isFirst + && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) + ){ + pCsr->nDoclist = apSegment[0]->nDoclist; + if( fts3SegReaderIsPending(apSegment[0]) ){ + rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); + pCsr->aDoclist = pCsr->aBuffer; + }else{ + pCsr->aDoclist = apSegment[0]->aDoclist; + } + if( rc==SQLITE_OK ) rc = SQLITE_ROW; + }else{ + int nDoclist = 0; /* Size of doclist */ + sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ + + /* The current term of the first nMerge entries in the array + ** of Fts3SegReader objects is the same. The doclists must be merged + ** and a single term returned with the merged doclist. + */ + for(i=0; i<nMerge; i++){ + fts3SegReaderFirstDocid(p, apSegment[i]); + } + fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp); + while( apSegment[0]->pOffsetList ){ + int j; /* Number of segments that share a docid */ + char *pList = 0; + int nList = 0; + int nByte; + sqlite3_int64 iDocid = apSegment[0]->iDocid; + fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); + j = 1; + while( j<nMerge + && apSegment[j]->pOffsetList + && apSegment[j]->iDocid==iDocid + ){ + fts3SegReaderNextDocid(p, apSegment[j], 0, 0); + j++; + } + + if( isColFilter ){ + fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); + } + + if( !isIgnoreEmpty || nList>0 ){ + + /* Calculate the 'docid' delta value to write into the merged + ** doclist. */ + sqlite3_int64 iDelta; + if( p->bDescIdx && nDoclist>0 ){ + iDelta = iPrev - iDocid; + }else{ + iDelta = iDocid - iPrev; + } + assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); + assert( nDoclist>0 || iDelta==iDocid ); + + nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); + if( nDoclist+nByte>pCsr->nBuffer ){ + char *aNew; + pCsr->nBuffer = (nDoclist+nByte)*2; + aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); + if( !aNew ){ + return SQLITE_NOMEM; + } + pCsr->aBuffer = aNew; + } + + if( isFirst ){ + char *a = &pCsr->aBuffer[nDoclist]; + int nWrite; + + nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); + if( nWrite ){ + iPrev = iDocid; + nDoclist += nWrite; + } + }else{ + nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); + iPrev = iDocid; + if( isRequirePos ){ + memcpy(&pCsr->aBuffer[nDoclist], pList, nList); + nDoclist += nList; + pCsr->aBuffer[nDoclist++] = '\0'; + } + } + } + + fts3SegReaderSort(apSegment, nMerge, j, xCmp); + } + if( nDoclist>0 ){ + pCsr->aDoclist = pCsr->aBuffer; + pCsr->nDoclist = nDoclist; + rc = SQLITE_ROW; + } + } + pCsr->nAdvance = nMerge; + }while( rc==SQLITE_OK ); + + return rc; } -/* -** Calling this function indicates that subsequent calls to -** fts3PendingTermsAdd() are to add term/position-list pairs for the -** contents of the document with docid iDocid. -*/ -static int fts3PendingTermsDocid( - Fts3Table *p, /* Full-text table handle */ - int iLangid, /* Language id of row being written */ - sqlite_int64 iDocid /* Docid of row being written */ + +SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( + Fts3MultiSegReader *pCsr /* Cursor object */ ){ - assert( iLangid>=0 ); + if( pCsr ){ + int i; + for(i=0; i<pCsr->nSegment; i++){ + sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); + } + sqlite3_free(pCsr->apSegment); + sqlite3_free(pCsr->aBuffer); - /* TODO(shess) Explore whether partially flushing the buffer on - ** forced-flush would provide better performance. I suspect that if - ** we ordered the doclists by size and flushed the largest until the - ** buffer was half empty, that would let the less frequent terms - ** generate longer doclists. - */ - if( iDocid<=p->iPrevDocid - || p->iPrevLangid!=iLangid - || p->nPendingData>p->nMaxPendingData - ){ - int rc = sqlite3Fts3PendingTermsFlush(p); - if( rc!=SQLITE_OK ) return rc; + pCsr->nSegment = 0; + pCsr->apSegment = 0; + pCsr->aBuffer = 0; } - p->iPrevDocid = iDocid; - p->iPrevLangid = iLangid; - return SQLITE_OK; } /* -** Discard the contents of the pending-terms hash tables. +** Decode the "end_block" field, selected by column iCol of the SELECT +** statement passed as the first argument. +** +** The "end_block" field may contain either an integer, or a text field +** containing the text representation of two non-negative integers separated +** by one or more space (0x20) characters. In the first case, set *piEndBlock +** to the integer value and *pnByte to zero before returning. In the second, +** set *piEndBlock to the first value and *pnByte to the second. */ -SQLITE_PRIVATE void sqlite3Fts3PendingTermsClear(Fts3Table *p){ - int i; - for(i=0; i<p->nIndex; i++){ - Fts3HashElem *pElem; - Fts3Hash *pHash = &p->aIndex[i].hPending; - for(pElem=fts3HashFirst(pHash); pElem; pElem=fts3HashNext(pElem)){ - PendingList *pList = (PendingList *)fts3HashData(pElem); - fts3PendingListDelete(pList); +static void fts3ReadEndBlockField( + sqlite3_stmt *pStmt, + int iCol, + i64 *piEndBlock, + i64 *pnByte +){ + const unsigned char *zText = sqlite3_column_text(pStmt, iCol); + if( zText ){ + int i; + int iMul = 1; + i64 iVal = 0; + for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); } - fts3HashClear(pHash); + *piEndBlock = iVal; + while( zText[i]==' ' ) i++; + iVal = 0; + if( zText[i]=='-' ){ + i++; + iMul = -1; + } + for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ + iVal = iVal*10 + (zText[i] - '0'); + } + *pnByte = (iVal * (i64)iMul); } - p->nPendingData = 0; } + /* -** This function is called by the xUpdate() method as part of an INSERT -** operation. It adds entries for each term in the new record to the -** pendingTerms hash table. -** -** Argument apVal is the same as the similarly named argument passed to -** fts3InsertData(). Parameter iDocid is the docid of the new row. +** A segment of size nByte bytes has just been written to absolute level +** iAbsLevel. Promote any segments that should be promoted as a result. */ -static int fts3InsertTerms( - Fts3Table *p, - int iLangid, - sqlite3_value **apVal, - u32 *aSz +static int fts3PromoteSegments( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level just updated */ + sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ ){ - int i; /* Iterator variable */ - for(i=2; i<p->nColumn+2; i++){ - int iCol = i-2; - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_value_text(apVal[i]); - int rc = fts3PendingTermsAdd(p, iLangid, zText, iCol, &aSz[iCol]); - if( rc!=SQLITE_OK ){ - return rc; + int rc = SQLITE_OK; + sqlite3_stmt *pRange; + + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); + + if( rc==SQLITE_OK ){ + int bOk = 0; + i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; + i64 nLimit = (nByte*3)/2; + + /* Loop through all entries in the %_segdir table corresponding to + ** segments in this index on levels greater than iAbsLevel. If there is + ** at least one such segment, and it is possible to determine that all + ** such segments are smaller than nLimit bytes in size, they will be + ** promoted to level iAbsLevel. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel+1); + sqlite3_bind_int64(pRange, 2, iLast); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + i64 nSize = 0, dummy; + fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); + if( nSize<=0 || nSize>nLimit ){ + /* If nSize==0, then the %_segdir.end_block field does not not + ** contain a size value. This happens if it was written by an + ** old version of FTS. In this case it is not possible to determine + ** the size of the segment, and so segment promotion does not + ** take place. */ + bOk = 0; + break; + } + bOk = 1; + } + rc = sqlite3_reset(pRange); + + if( bOk ){ + int iIdx = 0; + sqlite3_stmt *pUpdate1 = 0; + sqlite3_stmt *pUpdate2 = 0; + + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); + } + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); + } + + if( rc==SQLITE_OK ){ + + /* Loop through all %_segdir entries for segments in this index with + ** levels equal to or greater than iAbsLevel. As each entry is visited, + ** updated it to set (level = -1) and (idx = N), where N is 0 for the + ** oldest segment in the range, 1 for the next oldest, and so on. + ** + ** In other words, move all segments being promoted to level -1, + ** setting the "idx" fields as appropriate to keep them in the same + ** order. The contents of level -1 (which is never used, except + ** transiently here), will be moved back to level iAbsLevel below. */ + sqlite3_bind_int64(pRange, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pRange) ){ + sqlite3_bind_int(pUpdate1, 1, iIdx++); + sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); + sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); + sqlite3_step(pUpdate1); + rc = sqlite3_reset(pUpdate1); + if( rc!=SQLITE_OK ){ + sqlite3_reset(pRange); + break; + } + } + } + if( rc==SQLITE_OK ){ + rc = sqlite3_reset(pRange); + } + + /* Move level -1 to level iAbsLevel */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); + sqlite3_step(pUpdate2); + rc = sqlite3_reset(pUpdate2); } - aSz[p->nColumn] += sqlite3_value_bytes(apVal[i]); } } - return SQLITE_OK; + + + return rc; } /* -** This function is called by the xUpdate() method for an INSERT operation. -** The apVal parameter is passed a copy of the apVal argument passed by -** SQLite to the xUpdate() method. i.e: +** Merge all level iLevel segments in the database into a single +** iLevel+1 segment. Or, if iLevel<0, merge all segments into a +** single segment with a level equal to the numerically largest level +** currently present in the database. ** -** apVal[0] Not used for INSERT. -** apVal[1] rowid -** apVal[2] Left-most user-defined column -** ... -** apVal[p->nColumn+1] Right-most user-defined column -** apVal[p->nColumn+2] Hidden column with same name as table -** apVal[p->nColumn+3] Hidden "docid" column (alias for rowid) -** apVal[p->nColumn+4] Hidden languageid column +** If this function is called with iLevel<0, but there is only one +** segment in the database, SQLITE_DONE is returned immediately. +** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, +** an SQLite error code is returned. */ -static int fts3InsertData( - Fts3Table *p, /* Full-text table */ - sqlite3_value **apVal, /* Array of values to insert */ - sqlite3_int64 *piDocid /* OUT: Docid for row just inserted */ +static int fts3SegmentMerge( + Fts3Table *p, + int iLangid, /* Language id to merge */ + int iIndex, /* Index in p->aIndex[] to merge */ + int iLevel /* Level to merge */ ){ int rc; /* Return code */ - sqlite3_stmt *pContentInsert; /* INSERT INTO %_content VALUES(...) */ + int iIdx = 0; /* Index of new segment */ + sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ + SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ + Fts3SegFilter filter; /* Segment term filter condition */ + Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ + int bIgnoreEmpty = 0; /* True to ignore empty segments */ + i64 iMaxLevel = 0; /* Max level number for this index/langid */ - if( p->zContentTbl ){ - sqlite3_value *pRowid = apVal[p->nColumn+3]; - if( sqlite3_value_type(pRowid)==SQLITE_NULL ){ - pRowid = apVal[1]; - } - if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){ - return SQLITE_CONSTRAINT; + assert( iLevel==FTS3_SEGCURSOR_ALL + || iLevel==FTS3_SEGCURSOR_PENDING + || iLevel>=0 + ); + assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); + assert( iIndex>=0 && iIndex<p->nIndex ); + + rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); + if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); + if( rc!=SQLITE_OK ) goto finished; + } + + if( iLevel==FTS3_SEGCURSOR_ALL ){ + /* This call is to merge all segments in the database to a single + ** segment. The level of the new segment is equal to the numerically + ** greatest segment level currently present in the database for this + ** index. The idx of the new segment is always 0. */ + if( csr.nSegment==1 ){ + rc = SQLITE_DONE; + goto finished; } - *piDocid = sqlite3_value_int64(pRowid); - return SQLITE_OK; + iNewLevel = iMaxLevel; + bIgnoreEmpty = 1; + + }else{ + /* This call is to merge all segments at level iLevel. find the next + ** available segment index at level iLevel+1. The call to + ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to + ** a single iLevel+2 segment if necessary. */ + assert( FTS3_SEGCURSOR_PENDING==-1 ); + iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); + rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); + bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); } + if( rc!=SQLITE_OK ) goto finished; - /* Locate the statement handle used to insert data into the %_content - ** table. The SQL for this statement is: - ** - ** INSERT INTO %_content VALUES(?, ?, ?, ...) - ** - ** The statement features N '?' variables, where N is the number of user - ** defined columns in the FTS3 table, plus one for the docid field. - */ - rc = fts3SqlStmt(p, SQL_CONTENT_INSERT, &pContentInsert, &apVal[1]); - if( rc==SQLITE_OK && p->zLanguageid ){ - rc = sqlite3_bind_int( - pContentInsert, p->nColumn+2, - sqlite3_value_int(apVal[p->nColumn+4]) - ); + assert( csr.nSegment>0 ); + assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); + assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); + + memset(&filter, 0, sizeof(Fts3SegFilter)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS; + filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); + + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + while( SQLITE_OK==rc ){ + rc = sqlite3Fts3SegReaderStep(p, &csr); + if( rc!=SQLITE_ROW ) break; + rc = fts3SegWriterAdd(p, &pWriter, 1, + csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); } - if( rc!=SQLITE_OK ) return rc; + if( rc!=SQLITE_OK ) goto finished; + assert( pWriter || bIgnoreEmpty ); - /* There is a quirk here. The users INSERT statement may have specified - ** a value for the "rowid" field, for the "docid" field, or for both. - ** Which is a problem, since "rowid" and "docid" are aliases for the - ** same value. For example: - ** - ** INSERT INTO fts3tbl(rowid, docid) VALUES(1, 2); - ** - ** In FTS3, this is an error. It is an error to specify non-NULL values - ** for both docid and some other rowid alias. - */ - if( SQLITE_NULL!=sqlite3_value_type(apVal[3+p->nColumn]) ){ - if( SQLITE_NULL==sqlite3_value_type(apVal[0]) - && SQLITE_NULL!=sqlite3_value_type(apVal[1]) - ){ - /* A rowid/docid conflict. */ - return SQLITE_ERROR; + if( iLevel!=FTS3_SEGCURSOR_PENDING ){ + rc = fts3DeleteSegdir( + p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment + ); + if( rc!=SQLITE_OK ) goto finished; + } + if( pWriter ){ + rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); + if( rc==SQLITE_OK ){ + if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){ + rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); + } } - rc = sqlite3_bind_value(pContentInsert, 1, apVal[3+p->nColumn]); - if( rc!=SQLITE_OK ) return rc; } - /* Execute the statement to insert the record. Set *piDocid to the - ** new docid value. - */ - sqlite3_step(pContentInsert); - rc = sqlite3_reset(pContentInsert); - - *piDocid = sqlite3_last_insert_rowid(p->db); + finished: + fts3SegWriterFree(pWriter); + sqlite3Fts3SegReaderFinish(&csr); return rc; } - -/* -** Remove all data from the FTS3 table. Clear the hash table containing -** pending terms. +/* +** Flush the contents of pendingTerms to level 0 segments. */ -static int fts3DeleteAll(Fts3Table *p, int bContent){ - int rc = SQLITE_OK; /* Return code */ - - /* Discard the contents of the pending-terms hash table. */ +SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ + rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } sqlite3Fts3PendingTermsClear(p); - /* Delete everything from the shadow tables. Except, leave %_content as - ** is if bContent is false. */ - assert( p->zContentTbl==0 || bContent==0 ); - if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0); - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0); - fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0); - if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_DOCSIZE, 0); - } - if( p->bHasStat ){ - fts3SqlExec(&rc, p, SQL_DELETE_ALL_STAT, 0); + /* Determine the auto-incr-merge setting if unknown. If enabled, + ** estimate the number of leaf blocks of content to be written + */ + if( rc==SQLITE_OK && p->bHasStat + && p->nAutoincrmerge==0xff && p->nLeafAdd>0 + ){ + sqlite3_stmt *pStmt = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + rc = sqlite3_step(pStmt); + if( rc==SQLITE_ROW ){ + p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); + if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; + }else if( rc==SQLITE_DONE ){ + p->nAutoincrmerge = 0; + } + rc = sqlite3_reset(pStmt); + } } return rc; } /* -** +** Encode N integers as varints into a blob. */ -static int langidFromSelect(Fts3Table *p, sqlite3_stmt *pSelect){ - int iLangid = 0; - if( p->zLanguageid ) iLangid = sqlite3_column_int(pSelect, p->nColumn+1); - return iLangid; +static void fts3EncodeIntArray( + int N, /* The number of integers to encode */ + u32 *a, /* The integer values */ + char *zBuf, /* Write the BLOB here */ + int *pNBuf /* Write number of bytes if zBuf[] used here */ +){ + int i, j; + for(i=j=0; i<N; i++){ + j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]); + } + *pNBuf = j; } /* -** The first element in the apVal[] array is assumed to contain the docid -** (an integer) of a row about to be deleted. Remove all terms from the -** full-text index. +** Decode a blob of varints into N integers */ -static void fts3DeleteTerms( - int *pRC, /* Result code */ - Fts3Table *p, /* The FTS table to delete from */ - sqlite3_value *pRowid, /* The docid to be deleted */ - u32 *aSz, /* Sizes of deleted document written here */ - int *pbFound /* OUT: Set to true if row really does exist */ +static void fts3DecodeIntArray( + int N, /* The number of integers to decode */ + u32 *a, /* Write the integer values */ + const char *zBuf, /* The BLOB containing the varints */ + int nBuf /* size of the BLOB */ ){ - int rc; - sqlite3_stmt *pSelect; - - assert( *pbFound==0 ); - if( *pRC ) return; - rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pSelect, &pRowid); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pSelect) ){ - int i; - int iLangid = langidFromSelect(p, pSelect); - rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pSelect, 0)); - for(i=1; rc==SQLITE_OK && i<=p->nColumn; i++){ - int iCol = i-1; - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pSelect, i); - rc = fts3PendingTermsAdd(p, iLangid, zText, -1, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pSelect, i); - } - } - if( rc!=SQLITE_OK ){ - sqlite3_reset(pSelect); - *pRC = rc; - return; - } - *pbFound = 1; - } - rc = sqlite3_reset(pSelect); - }else{ - sqlite3_reset(pSelect); + int i, j; + UNUSED_PARAMETER(nBuf); + for(i=j=0; i<N; i++){ + sqlite3_int64 x; + j += sqlite3Fts3GetVarint(&zBuf[j], &x); + assert(j<=nBuf); + a[i] = (u32)(x & 0xffffffff); } - *pRC = rc; } /* -** Forward declaration to account for the circular dependency between -** functions fts3SegmentMerge() and fts3AllocateSegdirIdx(). +** Insert the sizes (in tokens) for each column of the document +** with docid equal to p->iPrevDocid. The sizes are encoded as +** a blob of varints. */ -static int fts3SegmentMerge(Fts3Table *, int, int, int); +static void fts3InsertDocsize( + int *pRC, /* Result code */ + Fts3Table *p, /* Table into which to insert */ + u32 *aSz /* Sizes of each column, in tokens */ +){ + char *pBlob; /* The BLOB encoding of the document size */ + int nBlob; /* Number of bytes in the BLOB */ + sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ + int rc; /* Result code from subfunctions */ -/* -** This function allocates a new level iLevel index in the segdir table. -** Usually, indexes are allocated within a level sequentially starting -** with 0, so the allocated index is one greater than the value returned -** by: + if( *pRC ) return; + pBlob = sqlite3_malloc( 10*p->nColumn ); + if( pBlob==0 ){ + *pRC = SQLITE_NOMEM; + return; + } + fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); + if( rc ){ + sqlite3_free(pBlob); + *pRC = rc; + return; + } + sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); +} + +/* +** Record 0 of the %_stat table contains a blob consisting of N varints, +** where N is the number of user defined columns in the fts3 table plus +** two. If nCol is the number of user defined columns, then values of the +** varints are set as follows: ** -** SELECT max(idx) FROM %_segdir WHERE level = :iLevel +** Varint 0: Total number of rows in the table. ** -** However, if there are already FTS3_MERGE_COUNT indexes at the requested -** level, they are merged into a single level (iLevel+1) segment and the -** allocated index is 0. +** Varint 1..nCol: For each column, the total number of tokens stored in +** the column for all rows of the table. +** +** Varint 1+nCol: The total size, in bytes, of all text values in all +** columns of all rows of the table. ** -** If successful, *piIdx is set to the allocated index slot and SQLITE_OK -** returned. Otherwise, an SQLite error code is returned. */ -static int fts3AllocateSegdirIdx( - Fts3Table *p, - int iLangid, /* Language id */ - int iIndex, /* Index for p->aIndex */ - int iLevel, - int *piIdx +static void fts3UpdateDocTotals( + int *pRC, /* The result code */ + Fts3Table *p, /* Table being updated */ + u32 *aSzIns, /* Size increases */ + u32 *aSzDel, /* Size decreases */ + int nChng /* Change in the number of documents */ ){ - int rc; /* Return Code */ - sqlite3_stmt *pNextIdx; /* Query for next idx at level iLevel */ - int iNext = 0; /* Result of query pNextIdx */ + char *pBlob; /* Storage for BLOB written into %_stat */ + int nBlob; /* Size of BLOB written into %_stat */ + u32 *a; /* Array of integers that becomes the BLOB */ + sqlite3_stmt *pStmt; /* Statement for reading and writing */ + int i; /* Loop counter */ + int rc; /* Result code from subfunctions */ - assert( iLangid>=0 ); - assert( p->nIndex>=1 ); + const int nStat = p->nColumn+2; - /* Set variable iNext to the next available segdir index at level iLevel. */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pNextIdx, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64( - pNextIdx, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) - ); - if( SQLITE_ROW==sqlite3_step(pNextIdx) ){ - iNext = sqlite3_column_int(pNextIdx, 0); + if( *pRC ) return; + a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); + if( a==0 ){ + *pRC = SQLITE_NOMEM; + return; + } + pBlob = (char*)&a[nStat]; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; + } + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + if( sqlite3_step(pStmt)==SQLITE_ROW ){ + fts3DecodeIntArray(nStat, a, + sqlite3_column_blob(pStmt, 0), + sqlite3_column_bytes(pStmt, 0)); + }else{ + memset(a, 0, sizeof(u32)*(nStat) ); + } + rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + sqlite3_free(a); + *pRC = rc; + return; + } + if( nChng<0 && a[0]<(u32)(-nChng) ){ + a[0] = 0; + }else{ + a[0] += nChng; + } + for(i=0; i<p->nColumn+1; i++){ + u32 x = a[i+1]; + if( x+aSzIns[i] < aSzDel[i] ){ + x = 0; + }else{ + x = x + aSzIns[i] - aSzDel[i]; } - rc = sqlite3_reset(pNextIdx); + a[i+1] = x; + } + fts3EncodeIntArray(nStat, a, pBlob, &nBlob); + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ){ + sqlite3_free(a); + *pRC = rc; + return; } + sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); + sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); + sqlite3_step(pStmt); + *pRC = sqlite3_reset(pStmt); + sqlite3_free(a); +} +/* +** Merge the entire database so that there is one segment for each +** iIndex/iLangid combination. +*/ +static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ + int bSeenDone = 0; + int rc; + sqlite3_stmt *pAllLangid = 0; + + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); if( rc==SQLITE_OK ){ - /* If iNext is FTS3_MERGE_COUNT, indicating that level iLevel is already - ** full, merge all segments in level iLevel into a single iLevel+1 - ** segment and allocate (newly freed) index 0 at level iLevel. Otherwise, - ** if iNext is less than FTS3_MERGE_COUNT, allocate index iNext. - */ - if( iNext>=FTS3_MERGE_COUNT ){ - fts3LogMerge(16, getAbsoluteLevel(p, iLangid, iIndex, iLevel)); - rc = fts3SegmentMerge(p, iLangid, iIndex, iLevel); - *piIdx = 0; - }else{ - *piIdx = iNext; + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int i; + int iLangid = sqlite3_column_int(pAllLangid, 0); + for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ + rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); + if( rc==SQLITE_DONE ){ + bSeenDone = 1; + rc = SQLITE_OK; + } + } } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; } - return rc; + sqlite3Fts3SegmentsClose(p); + sqlite3Fts3PendingTermsClear(p); + + return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; } /* -** The %_segments table is declared as follows: -** -** CREATE TABLE %_segments(blockid INTEGER PRIMARY KEY, block BLOB) -** -** This function reads data from a single row of the %_segments table. The -** specific row is identified by the iBlockid parameter. If paBlob is not -** NULL, then a buffer is allocated using sqlite3_malloc() and populated -** with the contents of the blob stored in the "block" column of the -** identified table row is. Whether or not paBlob is NULL, *pnBlob is set -** to the size of the blob in bytes before returning. +** This function is called when the user executes the following statement: ** -** If an error occurs, or the table does not contain the specified row, -** an SQLite error code is returned. Otherwise, SQLITE_OK is returned. If -** paBlob is non-NULL, then it is the responsibility of the caller to -** eventually free the returned buffer. +** INSERT INTO <tbl>(<tbl>) VALUES('rebuild'); ** -** This function may leave an open sqlite3_blob* handle in the -** Fts3Table.pSegments variable. This handle is reused by subsequent calls -** to this function. The handle may be closed by calling the -** sqlite3Fts3SegmentsClose() function. Reusing a blob handle is a handy -** performance improvement, but the blob handle should always be closed -** before control is returned to the user (to prevent a lock being held -** on the database file for longer than necessary). Thus, any virtual table -** method (xFilter etc.) that may directly or indirectly call this function -** must call sqlite3Fts3SegmentsClose() before returning. +** The entire FTS index is discarded and rebuilt. If the table is one +** created using the content=xxx option, then the new index is based on +** the current contents of the xxx table. Otherwise, it is rebuilt based +** on the contents of the %_content table. */ -SQLITE_PRIVATE int sqlite3Fts3ReadBlock( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iBlockid, /* Access the row with blockid=$iBlockid */ - char **paBlob, /* OUT: Blob data in malloc'd buffer */ - int *pnBlob, /* OUT: Size of blob data */ - int *pnLoad /* OUT: Bytes actually loaded */ -){ - int rc; /* Return code */ +static int fts3DoRebuild(Fts3Table *p){ + int rc; /* Return Code */ - /* pnBlob must be non-NULL. paBlob may be NULL or non-NULL. */ - assert( pnBlob ); + rc = fts3DeleteAll(p, 0); + if( rc==SQLITE_OK ){ + u32 *aSz = 0; + u32 *aSzIns = 0; + u32 *aSzDel = 0; + sqlite3_stmt *pStmt = 0; + int nEntry = 0; - if( p->pSegments ){ - rc = sqlite3_blob_reopen(p->pSegments, iBlockid); - }else{ - if( 0==p->zSegmentsTbl ){ - p->zSegmentsTbl = sqlite3_mprintf("%s_segments", p->zName); - if( 0==p->zSegmentsTbl ) return SQLITE_NOMEM; + /* Compose and prepare an SQL statement to loop through the content table */ + char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); } - rc = sqlite3_blob_open( - p->db, p->zDb, p->zSegmentsTbl, "block", iBlockid, 0, &p->pSegments - ); - } - if( rc==SQLITE_OK ){ - int nByte = sqlite3_blob_bytes(p->pSegments); - *pnBlob = nByte; - if( paBlob ){ - char *aByte = sqlite3_malloc(nByte + FTS3_NODE_PADDING); - if( !aByte ){ + if( rc==SQLITE_OK ){ + int nByte = sizeof(u32) * (p->nColumn+1)*3; + aSz = (u32 *)sqlite3_malloc(nByte); + if( aSz==0 ){ rc = SQLITE_NOMEM; }else{ - if( pnLoad && nByte>(FTS3_NODE_CHUNK_THRESHOLD) ){ - nByte = FTS3_NODE_CHUNKSIZE; - *pnLoad = nByte; + memset(aSz, 0, nByte); + aSzIns = &aSz[p->nColumn+1]; + aSzDel = &aSzIns[p->nColumn+1]; + } + } + + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + int iCol; + int iLangid = langidFromSelect(p, pStmt); + rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); + memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); + for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); + rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); + aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); } - rc = sqlite3_blob_read(p->pSegments, aByte, nByte, 0); - memset(&aByte[nByte], 0, FTS3_NODE_PADDING); - if( rc!=SQLITE_OK ){ - sqlite3_free(aByte); - aByte = 0; + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSz); + } + if( rc!=SQLITE_OK ){ + sqlite3_finalize(pStmt); + pStmt = 0; + }else{ + nEntry++; + for(iCol=0; iCol<=p->nColumn; iCol++){ + aSzIns[iCol] += aSz[iCol]; } } - *paBlob = aByte; + } + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); + } + sqlite3_free(aSz); + + if( pStmt ){ + int rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ){ + rc = rc2; + } } } return rc; } + /* -** Close the blob handle at p->pSegments, if it is open. See comments above -** the sqlite3Fts3ReadBlock() function for details. +** This function opens a cursor used to read the input data for an +** incremental merge operation. Specifically, it opens a cursor to scan +** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute +** level iAbsLevel. */ -SQLITE_PRIVATE void sqlite3Fts3SegmentsClose(Fts3Table *p){ - sqlite3_blob_close(p->pSegments); - p->pSegments = 0; -} - -static int fts3SegReaderIncrRead(Fts3SegReader *pReader){ - int nRead; /* Number of bytes to read */ - int rc; /* Return code */ +static int fts3IncrmergeCsr( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to open */ + int nSeg, /* Number of segments to merge */ + Fts3MultiSegReader *pCsr /* Cursor object to populate */ +){ + int rc; /* Return Code */ + sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ + int nByte; /* Bytes allocated at pCsr->apSegment[] */ - nRead = MIN(pReader->nNode - pReader->nPopulate, FTS3_NODE_CHUNKSIZE); - rc = sqlite3_blob_read( - pReader->pBlob, - &pReader->aNode[pReader->nPopulate], - nRead, - pReader->nPopulate - ); + /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ + memset(pCsr, 0, sizeof(*pCsr)); + nByte = sizeof(Fts3SegReader *) * nSeg; + pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + if( pCsr->apSegment==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pCsr->apSegment, 0, nByte); + rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); + } if( rc==SQLITE_OK ){ - pReader->nPopulate += nRead; - memset(&pReader->aNode[pReader->nPopulate], 0, FTS3_NODE_PADDING); - if( pReader->nPopulate==pReader->nNode ){ - sqlite3_blob_close(pReader->pBlob); - pReader->pBlob = 0; - pReader->nPopulate = 0; + int i; + int rc2; + sqlite3_bind_int64(pStmt, 1, iAbsLevel); + assert( pCsr->nSegment==0 ); + for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){ + rc = sqlite3Fts3SegReaderNew(i, 0, + sqlite3_column_int64(pStmt, 1), /* segdir.start_block */ + sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */ + sqlite3_column_int64(pStmt, 3), /* segdir.end_block */ + sqlite3_column_blob(pStmt, 4), /* segdir.root */ + sqlite3_column_bytes(pStmt, 4), /* segdir.root */ + &pCsr->apSegment[i] + ); + pCsr->nSegment++; } + rc2 = sqlite3_reset(pStmt); + if( rc==SQLITE_OK ) rc = rc2; } - return rc; -} -static int fts3SegReaderRequire(Fts3SegReader *pReader, char *pFrom, int nByte){ - int rc = SQLITE_OK; - assert( !pReader->pBlob - || (pFrom>=pReader->aNode && pFrom<&pReader->aNode[pReader->nNode]) - ); - while( pReader->pBlob && rc==SQLITE_OK - && (pFrom - pReader->aNode + nByte)>pReader->nPopulate - ){ - rc = fts3SegReaderIncrRead(pReader); - } return rc; } +typedef struct IncrmergeWriter IncrmergeWriter; +typedef struct NodeWriter NodeWriter; +typedef struct Blob Blob; +typedef struct NodeReader NodeReader; + /* -** Set an Fts3SegReader cursor to point at EOF. +** An instance of the following structure is used as a dynamic buffer +** to build up nodes or other blobs of data in. +** +** The function blobGrowBuffer() is used to extend the allocation. */ -static void fts3SegReaderSetEof(Fts3SegReader *pSeg){ - if( !fts3SegReaderIsRootOnly(pSeg) ){ - sqlite3_free(pSeg->aNode); - sqlite3_blob_close(pSeg->pBlob); - pSeg->pBlob = 0; - } - pSeg->aNode = 0; -} +struct Blob { + char *a; /* Pointer to allocation */ + int n; /* Number of valid bytes of data in a[] */ + int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ +}; /* -** Move the iterator passed as the first argument to the next term in the -** segment. If successful, SQLITE_OK is returned. If there is no next term, -** SQLITE_DONE. Otherwise, an SQLite error code. +** This structure is used to build up buffers containing segment b-tree +** nodes (blocks). */ -static int fts3SegReaderNext( - Fts3Table *p, - Fts3SegReader *pReader, - int bIncr -){ - int rc; /* Return code of various sub-routines */ - char *pNext; /* Cursor variable */ - int nPrefix; /* Number of bytes in term prefix */ - int nSuffix; /* Number of bytes in term suffix */ - - if( !pReader->aDoclist ){ - pNext = pReader->aNode; - }else{ - pNext = &pReader->aDoclist[pReader->nDoclist]; - } - - if( !pNext || pNext>=&pReader->aNode[pReader->nNode] ){ +struct NodeWriter { + sqlite3_int64 iBlock; /* Current block id */ + Blob key; /* Last key written to the current block */ + Blob block; /* Current block image */ +}; - if( fts3SegReaderIsPending(pReader) ){ - Fts3HashElem *pElem = *(pReader->ppNextElem); - if( pElem==0 ){ - pReader->aNode = 0; - }else{ - PendingList *pList = (PendingList *)fts3HashData(pElem); - pReader->zTerm = (char *)fts3HashKey(pElem); - pReader->nTerm = fts3HashKeysize(pElem); - pReader->nNode = pReader->nDoclist = pList->nData + 1; - pReader->aNode = pReader->aDoclist = pList->aData; - pReader->ppNextElem++; - assert( pReader->aNode ); - } - return SQLITE_OK; - } +/* +** An object of this type contains the state required to create or append +** to an appendable b-tree segment. +*/ +struct IncrmergeWriter { + int nLeafEst; /* Space allocated for leaf blocks */ + int nWork; /* Number of leaf pages flushed */ + sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ + int iIdx; /* Index of *output* segment in iAbsLevel+1 */ + sqlite3_int64 iStart; /* Block number of first allocated block */ + sqlite3_int64 iEnd; /* Block number of last allocated block */ + sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ + u8 bNoLeafData; /* If true, store 0 for segment size */ + NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; +}; - fts3SegReaderSetEof(pReader); +/* +** An object of the following type is used to read data from a single +** FTS segment node. See the following functions: +** +** nodeReaderInit() +** nodeReaderNext() +** nodeReaderRelease() +*/ +struct NodeReader { + const char *aNode; + int nNode; + int iOff; /* Current offset within aNode[] */ - /* If iCurrentBlock>=iLeafEndBlock, this is an EOF condition. All leaf - ** blocks have already been traversed. */ - assert( pReader->iCurrentBlock<=pReader->iLeafEndBlock ); - if( pReader->iCurrentBlock>=pReader->iLeafEndBlock ){ - return SQLITE_OK; - } + /* Output variables. Containing the current node entry. */ + sqlite3_int64 iChild; /* Pointer to child node */ + Blob term; /* Current term */ + const char *aDoclist; /* Pointer to doclist */ + int nDoclist; /* Size of doclist in bytes */ +}; - rc = sqlite3Fts3ReadBlock( - p, ++pReader->iCurrentBlock, &pReader->aNode, &pReader->nNode, - (bIncr ? &pReader->nPopulate : 0) - ); - if( rc!=SQLITE_OK ) return rc; - assert( pReader->pBlob==0 ); - if( bIncr && pReader->nPopulate<pReader->nNode ){ - pReader->pBlob = p->pSegments; - p->pSegments = 0; +/* +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, if the allocation at pBlob->a is not already at least nMin +** bytes in size, extend (realloc) it to be so. +** +** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a +** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc +** to reflect the new size of the pBlob->a[] buffer. +*/ +static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ + if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ + int nAlloc = nMin; + char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); + if( a ){ + pBlob->nAlloc = nAlloc; + pBlob->a = a; + }else{ + *pRc = SQLITE_NOMEM; } - pNext = pReader->aNode; } +} - assert( !fts3SegReaderIsPending(pReader) ); +/* +** Attempt to advance the node-reader object passed as the first argument to +** the next entry on the node. +** +** Return an error code if an error occurs (SQLITE_NOMEM is possible). +** Otherwise return SQLITE_OK. If there is no next entry on the node +** (e.g. because the current entry is the last) set NodeReader->aNode to +** NULL to indicate EOF. Otherwise, populate the NodeReader structure output +** variables for the new entry. +*/ +static int nodeReaderNext(NodeReader *p){ + int bFirst = (p->term.n==0); /* True for first term on the node */ + int nPrefix = 0; /* Bytes to copy from previous term */ + int nSuffix = 0; /* Bytes to append to the prefix */ + int rc = SQLITE_OK; /* Return code */ - rc = fts3SegReaderRequire(pReader, pNext, FTS3_VARINT_MAX*2); - if( rc!=SQLITE_OK ) return rc; - - /* Because of the FTS3_NODE_PADDING bytes of padding, the following is - ** safe (no risk of overread) even if the node data is corrupted. */ - pNext += fts3GetVarint32(pNext, &nPrefix); - pNext += fts3GetVarint32(pNext, &nSuffix); - if( nPrefix<0 || nSuffix<=0 - || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] - ){ - return FTS_CORRUPT_VTAB; - } + assert( p->aNode ); + if( p->iChild && bFirst==0 ) p->iChild++; + if( p->iOff>=p->nNode ){ + /* EOF */ + p->aNode = 0; + }else{ + if( bFirst==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + } + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); - if( nPrefix+nSuffix>pReader->nTermAlloc ){ - int nNew = (nPrefix+nSuffix)*2; - char *zNew = sqlite3_realloc(pReader->zTerm, nNew); - if( !zNew ){ - return SQLITE_NOMEM; + blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); + if( rc==SQLITE_OK ){ + memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); + p->term.n = nPrefix+nSuffix; + p->iOff += nSuffix; + if( p->iChild==0 ){ + p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); + p->aDoclist = &p->aNode[p->iOff]; + p->iOff += p->nDoclist; + } } - pReader->zTerm = zNew; - pReader->nTermAlloc = nNew; } - rc = fts3SegReaderRequire(pReader, pNext, nSuffix+FTS3_VARINT_MAX); - if( rc!=SQLITE_OK ) return rc; + assert( p->iOff<=p->nNode ); - memcpy(&pReader->zTerm[nPrefix], pNext, nSuffix); - pReader->nTerm = nPrefix+nSuffix; - pNext += nSuffix; - pNext += fts3GetVarint32(pNext, &pReader->nDoclist); - pReader->aDoclist = pNext; - pReader->pOffsetList = 0; + return rc; +} - /* Check that the doclist does not appear to extend past the end of the - ** b-tree node. And that the final byte of the doclist is 0x00. If either - ** of these statements is untrue, then the data structure is corrupt. - */ - if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] - || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1]) - ){ - return FTS_CORRUPT_VTAB; - } - return SQLITE_OK; +/* +** Release all dynamic resources held by node-reader object *p. +*/ +static void nodeReaderRelease(NodeReader *p){ + sqlite3_free(p->term.a); } /* -** Set the SegReader to point to the first docid in the doclist associated -** with the current term. +** Initialize a node-reader object to read the node in buffer aNode/nNode. +** +** If successful, SQLITE_OK is returned and the NodeReader object set to +** point to the first entry on the node (if any). Otherwise, an SQLite +** error code is returned. */ -static int fts3SegReaderFirstDocid(Fts3Table *pTab, Fts3SegReader *pReader){ - int rc = SQLITE_OK; - assert( pReader->aDoclist ); - assert( !pReader->pOffsetList ); - if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ - u8 bEof = 0; - pReader->iDocid = 0; - pReader->nOffsetList = 0; - sqlite3Fts3DoclistPrev(0, - pReader->aDoclist, pReader->nDoclist, &pReader->pOffsetList, - &pReader->iDocid, &pReader->nOffsetList, &bEof - ); +static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ + memset(p, 0, sizeof(NodeReader)); + p->aNode = aNode; + p->nNode = nNode; + + /* Figure out if this is a leaf or an internal node. */ + if( p->aNode[0] ){ + /* An internal node. */ + p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); }else{ - rc = fts3SegReaderRequire(pReader, pReader->aDoclist, FTS3_VARINT_MAX); - if( rc==SQLITE_OK ){ - int n = sqlite3Fts3GetVarint(pReader->aDoclist, &pReader->iDocid); - pReader->pOffsetList = &pReader->aDoclist[n]; - } + p->iOff = 1; } - return rc; + + return nodeReaderNext(p); } /* -** Advance the SegReader to point to the next docid in the doclist -** associated with the current term. -** -** If arguments ppOffsetList and pnOffsetList are not NULL, then -** *ppOffsetList is set to point to the first column-offset list -** in the doclist entry (i.e. immediately past the docid varint). -** *pnOffsetList is set to the length of the set of column-offset -** lists, not including the nul-terminator byte. For example: +** This function is called while writing an FTS segment each time a leaf o +** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed +** to be greater than the largest key on the node just written, but smaller +** than or equal to the first key that will be written to the next leaf +** node. +** +** The block id of the leaf node just written to disk may be found in +** (pWriter->aNodeWriter[0].iBlock) when this function is called. */ -static int fts3SegReaderNextDocid( - Fts3Table *pTab, - Fts3SegReader *pReader, /* Reader to advance to next docid */ - char **ppOffsetList, /* OUT: Pointer to current position-list */ - int *pnOffsetList /* OUT: Length of *ppOffsetList in bytes */ +static int fts3IncrmergePush( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + const char *zTerm, /* Term to write to internal node */ + int nTerm /* Bytes at zTerm */ ){ - int rc = SQLITE_OK; - char *p = pReader->pOffsetList; - char c = 0; + sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; + int iLayer; - assert( p ); + assert( nTerm>0 ); + for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){ + sqlite3_int64 iNextPtr = 0; + NodeWriter *pNode = &pWriter->aNodeWriter[iLayer]; + int rc = SQLITE_OK; + int nPrefix; + int nSuffix; + int nSpace; - if( pTab->bDescIdx && fts3SegReaderIsPending(pReader) ){ - /* A pending-terms seg-reader for an FTS4 table that uses order=desc. - ** Pending-terms doclists are always built up in ascending order, so - ** we have to iterate through them backwards here. */ - u8 bEof = 0; - if( ppOffsetList ){ - *ppOffsetList = pReader->pOffsetList; - *pnOffsetList = pReader->nOffsetList - 1; - } - sqlite3Fts3DoclistPrev(0, - pReader->aDoclist, pReader->nDoclist, &p, &pReader->iDocid, - &pReader->nOffsetList, &bEof - ); - if( bEof ){ - pReader->pOffsetList = 0; - }else{ - pReader->pOffsetList = p; - } - }else{ - char *pEnd = &pReader->aDoclist[pReader->nDoclist]; + /* Figure out how much space the key will consume if it is written to + ** the current node of layer iLayer. Due to the prefix compression, + ** the space required changes depending on which node the key is to + ** be added to. */ + nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - /* Pointer p currently points at the first byte of an offset list. The - ** following block advances it to point one byte past the end of - ** the same offset list. */ - while( 1 ){ - - /* The following line of code (and the "p++" below the while() loop) is - ** normally all that is required to move pointer p to the desired - ** position. The exception is if this node is being loaded from disk - ** incrementally and pointer "p" now points to the first byte past - ** the populated part of pReader->aNode[]. - */ - while( *p | c ) c = *p++ & 0x80; - assert( *p==0 ); - - if( pReader->pBlob==0 || p<&pReader->aNode[pReader->nPopulate] ) break; - rc = fts3SegReaderIncrRead(pReader); - if( rc!=SQLITE_OK ) return rc; - } - p++; - - /* If required, populate the output variables with a pointer to and the - ** size of the previous offset-list. - */ - if( ppOffsetList ){ - *ppOffsetList = pReader->pOffsetList; - *pnOffsetList = (int)(p - pReader->pOffsetList - 1); - } + if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ + /* If the current node of layer iLayer contains zero keys, or if adding + ** the key to it will not cause it to grow to larger than nNodeSize + ** bytes in size, write the key here. */ - /* List may have been edited in place by fts3EvalNearTrim() */ - while( p<pEnd && *p==0 ) p++; - - /* If there are no more entries in the doclist, set pOffsetList to - ** NULL. Otherwise, set Fts3SegReader.iDocid to the next docid and - ** Fts3SegReader.pOffsetList to point to the next offset list before - ** returning. - */ - if( p>=pEnd ){ - pReader->pOffsetList = 0; - }else{ - rc = fts3SegReaderRequire(pReader, p, FTS3_VARINT_MAX); - if( rc==SQLITE_OK ){ - sqlite3_int64 iDelta; - pReader->pOffsetList = p + sqlite3Fts3GetVarint(p, &iDelta); - if( pTab->bDescIdx ){ - pReader->iDocid -= iDelta; - }else{ - pReader->iDocid += iDelta; + Blob *pBlk = &pNode->block; + if( pBlk->n==0 ){ + blobGrowBuffer(pBlk, p->nNodeSize, &rc); + if( rc==SQLITE_OK ){ + pBlk->a[0] = (char)iLayer; + pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); } } - } - } - - return SQLITE_OK; -} + blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); + blobGrowBuffer(&pNode->key, nTerm, &rc); + if( rc==SQLITE_OK ){ + if( pNode->key.n ){ + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); + } + pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); + memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); + pBlk->n += nSuffix; -SQLITE_PRIVATE int sqlite3Fts3MsrOvfl( - Fts3Cursor *pCsr, - Fts3MultiSegReader *pMsr, - int *pnOvfl -){ - Fts3Table *p = (Fts3Table*)pCsr->base.pVtab; - int nOvfl = 0; - int ii; - int rc = SQLITE_OK; - int pgsz = p->nPgsz; + memcpy(pNode->key.a, zTerm, nTerm); + pNode->key.n = nTerm; + } + }else{ + /* Otherwise, flush the current node of layer iLayer to disk. + ** Then allocate a new, empty sibling node. The key will be written + ** into the parent of this node. */ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); - assert( p->bFts4 ); - assert( pgsz>0 ); + assert( pNode->block.nAlloc>=p->nNodeSize ); + pNode->block.a[0] = (char)iLayer; + pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); - for(ii=0; rc==SQLITE_OK && ii<pMsr->nSegment; ii++){ - Fts3SegReader *pReader = pMsr->apSegment[ii]; - if( !fts3SegReaderIsPending(pReader) - && !fts3SegReaderIsRootOnly(pReader) - ){ - sqlite3_int64 jj; - for(jj=pReader->iStartBlock; jj<=pReader->iLeafEndBlock; jj++){ - int nBlob; - rc = sqlite3Fts3ReadBlock(p, jj, 0, &nBlob, 0); - if( rc!=SQLITE_OK ) break; - if( (nBlob+35)>pgsz ){ - nOvfl += (nBlob + 34)/pgsz; - } - } + iNextPtr = pNode->iBlock; + pNode->iBlock++; + pNode->key.n = 0; } - } - *pnOvfl = nOvfl; - return rc; -} -/* -** Free all allocations associated with the iterator passed as the -** second argument. -*/ -SQLITE_PRIVATE void sqlite3Fts3SegReaderFree(Fts3SegReader *pReader){ - if( pReader && !fts3SegReaderIsPending(pReader) ){ - sqlite3_free(pReader->zTerm); - if( !fts3SegReaderIsRootOnly(pReader) ){ - sqlite3_free(pReader->aNode); - sqlite3_blob_close(pReader->pBlob); - } + if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; + iPtr = iNextPtr; } - sqlite3_free(pReader); + + assert( 0 ); + return 0; } /* -** Allocate a new SegReader object. +** Append a term and (optionally) doclist to the FTS segment node currently +** stored in blob *pNode. The node need not contain any terms, but the +** header must be written before this function is called. +** +** A node header is a single 0x00 byte for a leaf node, or a height varint +** followed by the left-hand-child varint for an internal node. +** +** The term to be appended is passed via arguments zTerm/nTerm. For a +** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal +** node, both aDoclist and nDoclist must be passed 0. +** +** If the size of the value in blob pPrev is zero, then this is the first +** term written to the node. Otherwise, pPrev contains a copy of the +** previous term. Before this function returns, it is updated to contain a +** copy of zTerm/nTerm. +** +** It is assumed that the buffer associated with pNode is already large +** enough to accommodate the new entry. The buffer associated with pPrev +** is extended by this function if requrired. +** +** If an error (i.e. OOM condition) occurs, an SQLite error code is +** returned. Otherwise, SQLITE_OK. */ -SQLITE_PRIVATE int sqlite3Fts3SegReaderNew( - int iAge, /* Segment "age". */ - int bLookup, /* True for a lookup only */ - sqlite3_int64 iStartLeaf, /* First leaf to traverse */ - sqlite3_int64 iEndLeaf, /* Final leaf to traverse */ - sqlite3_int64 iEndBlock, /* Final block of segment */ - const char *zRoot, /* Buffer containing root node */ - int nRoot, /* Size of buffer containing root node */ - Fts3SegReader **ppReader /* OUT: Allocated Fts3SegReader */ +static int fts3AppendToNode( + Blob *pNode, /* Current node image to append to */ + Blob *pPrev, /* Buffer containing previous term written */ + const char *zTerm, /* New term to write */ + int nTerm, /* Size of zTerm in bytes */ + const char *aDoclist, /* Doclist (or NULL) to write */ + int nDoclist /* Size of aDoclist in bytes */ ){ - Fts3SegReader *pReader; /* Newly allocated SegReader object */ - int nExtra = 0; /* Bytes to allocate segment root node */ + int rc = SQLITE_OK; /* Return code */ + int bFirst = (pPrev->n==0); /* True if this is the first term written */ + int nPrefix; /* Size of term prefix in bytes */ + int nSuffix; /* Size of term suffix in bytes */ - assert( iStartLeaf<=iEndLeaf ); - if( iStartLeaf==0 ){ - nExtra = nRoot + FTS3_NODE_PADDING; - } + /* Node must have already been started. There must be a doclist for a + ** leaf node, and there must not be a doclist for an internal node. */ + assert( pNode->n>0 ); + assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); - pReader = (Fts3SegReader *)sqlite3_malloc(sizeof(Fts3SegReader) + nExtra); - if( !pReader ){ - return SQLITE_NOMEM; + blobGrowBuffer(pPrev, nTerm, &rc); + if( rc!=SQLITE_OK ) return rc; + + nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + memcpy(pPrev->a, zTerm, nTerm); + pPrev->n = nTerm; + + if( bFirst==0 ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); } - memset(pReader, 0, sizeof(Fts3SegReader)); - pReader->iIdx = iAge; - pReader->bLookup = bLookup!=0; - pReader->iStartBlock = iStartLeaf; - pReader->iLeafEndBlock = iEndLeaf; - pReader->iEndBlock = iEndBlock; + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); + memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); + pNode->n += nSuffix; - if( nExtra ){ - /* The entire segment is stored in the root node. */ - pReader->aNode = (char *)&pReader[1]; - pReader->rootOnly = 1; - pReader->nNode = nRoot; - memcpy(pReader->aNode, zRoot, nRoot); - memset(&pReader->aNode[nRoot], 0, FTS3_NODE_PADDING); - }else{ - pReader->iCurrentBlock = iStartLeaf-1; + if( aDoclist ){ + pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); + memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); + pNode->n += nDoclist; } - *ppReader = pReader; + + assert( pNode->n<=pNode->nAlloc ); + return SQLITE_OK; } /* -** This is a comparison function used as a qsort() callback when sorting -** an array of pending terms by term. This occurs as part of flushing -** the contents of the pending-terms hash table to the database. +** Append the current term and doclist pointed to by cursor pCsr to the +** appendable b-tree segment opened for writing by pWriter. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. */ -static int SQLITE_CDECL fts3CompareElemByTerm( - const void *lhs, - const void *rhs +static int fts3IncrmergeAppend( + Fts3Table *p, /* Fts3 table handle */ + IncrmergeWriter *pWriter, /* Writer object */ + Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ ){ - char *z1 = fts3HashKey(*(Fts3HashElem **)lhs); - char *z2 = fts3HashKey(*(Fts3HashElem **)rhs); - int n1 = fts3HashKeysize(*(Fts3HashElem **)lhs); - int n2 = fts3HashKeysize(*(Fts3HashElem **)rhs); + const char *zTerm = pCsr->zTerm; + int nTerm = pCsr->nTerm; + const char *aDoclist = pCsr->aDoclist; + int nDoclist = pCsr->nDoclist; + int rc = SQLITE_OK; /* Return code */ + int nSpace; /* Total space in bytes required on leaf */ + int nPrefix; /* Size of prefix shared with previous term */ + int nSuffix; /* Size of suffix (nTerm - nPrefix) */ + NodeWriter *pLeaf; /* Object used to write leaf nodes */ - int n = (n1<n2 ? n1 : n2); - int c = memcmp(z1, z2, n); - if( c==0 ){ - c = n1 - n2; + pLeaf = &pWriter->aNodeWriter[0]; + nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); + nSuffix = nTerm - nPrefix; + + nSpace = sqlite3Fts3VarintLen(nPrefix); + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; + + /* If the current block is not empty, and if adding this term/doclist + ** to the current block would make it larger than Fts3Table.nNodeSize + ** bytes, write this block out to the database. */ + if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ + rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); + pWriter->nWork++; + + /* Add the current term to the parent node. The term added to the + ** parent must: + ** + ** a) be greater than the largest term on the leaf node just written + ** to the database (still available in pLeaf->key), and + ** + ** b) be less than or equal to the term about to be added to the new + ** leaf node (zTerm/nTerm). + ** + ** In other words, it must be the prefix of zTerm 1 byte longer than + ** the common prefix (if any) of zTerm and pWriter->zTerm. + */ + if( rc==SQLITE_OK ){ + rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); + } + + /* Advance to the next output block */ + pLeaf->iBlock++; + pLeaf->key.n = 0; + pLeaf->block.n = 0; + + nSuffix = nTerm; + nSpace = 1; + nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; } - return c; + + pWriter->nLeafData += nSpace; + blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); + if( rc==SQLITE_OK ){ + if( pLeaf->block.n==0 ){ + pLeaf->block.n = 1; + pLeaf->block.a[0] = '\0'; + } + rc = fts3AppendToNode( + &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist + ); + } + + return rc; } /* -** This function is used to allocate an Fts3SegReader that iterates through -** a subset of the terms stored in the Fts3Table.pendingTerms array. -** -** If the isPrefixIter parameter is zero, then the returned SegReader iterates -** through each term in the pending-terms table. Or, if isPrefixIter is -** non-zero, it iterates through each term and its prefixes. For example, if -** the pending terms hash table contains the terms "sqlite", "mysql" and -** "firebird", then the iterator visits the following 'terms' (in the order -** shown): -** -** f fi fir fire fireb firebi firebir firebird -** m my mys mysq mysql -** s sq sql sqli sqlit sqlite +** This function is called to release all dynamic resources held by the +** merge-writer object pWriter, and if no error has occurred, to flush +** all outstanding node buffers held by pWriter to disk. ** -** Whereas if isPrefixIter is zero, the terms visited are: +** If *pRc is not SQLITE_OK when this function is called, then no attempt +** is made to write any data to disk. Instead, this function serves only +** to release outstanding resources. ** -** firebird mysql sqlite +** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while +** flushing buffers to disk, *pRc is set to an SQLite error code before +** returning. */ -SQLITE_PRIVATE int sqlite3Fts3SegReaderPending( - Fts3Table *p, /* Virtual table handle */ - int iIndex, /* Index for p->aIndex */ - const char *zTerm, /* Term to search for */ - int nTerm, /* Size of buffer zTerm */ - int bPrefix, /* True for a prefix iterator */ - Fts3SegReader **ppReader /* OUT: SegReader for pending-terms */ +static void fts3IncrmergeRelease( + Fts3Table *p, /* FTS3 table handle */ + IncrmergeWriter *pWriter, /* Merge-writer object */ + int *pRc /* IN/OUT: Error code */ ){ - Fts3SegReader *pReader = 0; /* Fts3SegReader object to return */ - Fts3HashElem *pE; /* Iterator variable */ - Fts3HashElem **aElem = 0; /* Array of term hash entries to scan */ - int nElem = 0; /* Size of array at aElem */ - int rc = SQLITE_OK; /* Return Code */ - Fts3Hash *pHash; - - pHash = &p->aIndex[iIndex].hPending; - if( bPrefix ){ - int nAlloc = 0; /* Size of allocated array at aElem */ - - for(pE=fts3HashFirst(pHash); pE; pE=fts3HashNext(pE)){ - char *zKey = (char *)fts3HashKey(pE); - int nKey = fts3HashKeysize(pE); - if( nTerm==0 || (nKey>=nTerm && 0==memcmp(zKey, zTerm, nTerm)) ){ - if( nElem==nAlloc ){ - Fts3HashElem **aElem2; - nAlloc += 16; - aElem2 = (Fts3HashElem **)sqlite3_realloc( - aElem, nAlloc*sizeof(Fts3HashElem *) - ); - if( !aElem2 ){ - rc = SQLITE_NOMEM; - nElem = 0; - break; - } - aElem = aElem2; - } + int i; /* Used to iterate through non-root layers */ + int iRoot; /* Index of root in pWriter->aNodeWriter */ + NodeWriter *pRoot; /* NodeWriter for root node */ + int rc = *pRc; /* Error code */ - aElem[nElem++] = pE; - } - } + /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment + ** root node. If the segment fits entirely on a single leaf node, iRoot + ** will be set to 0. If the root node is the parent of the leaves, iRoot + ** will be 1. And so on. */ + for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ + NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; + if( pNode->block.n>0 ) break; + assert( *pRc || pNode->block.nAlloc==0 ); + assert( *pRc || pNode->key.nAlloc==0 ); + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); + } - /* If more than one term matches the prefix, sort the Fts3HashElem - ** objects in term order using qsort(). This uses the same comparison - ** callback as is used when flushing terms to disk. - */ - if( nElem>1 ){ - qsort(aElem, nElem, sizeof(Fts3HashElem *), fts3CompareElemByTerm); - } + /* Empty output segment. This is a no-op. */ + if( iRoot<0 ) return; - }else{ - /* The query is a simple term lookup that matches at most one term in - ** the index. All that is required is a straight hash-lookup. - ** - ** Because the stack address of pE may be accessed via the aElem pointer - ** below, the "Fts3HashElem *pE" must be declared so that it is valid - ** within this entire function, not just this "else{...}" block. - */ - pE = fts3HashFindElem(pHash, zTerm, nTerm); - if( pE ){ - aElem = &pE; - nElem = 1; + /* The entire output segment fits on a single node. Normally, this means + ** the node would be stored as a blob in the "root" column of the %_segdir + ** table. However, this is not permitted in this case. The problem is that + ** space has already been reserved in the %_segments table, and so the + ** start_block and end_block fields of the %_segdir table must be populated. + ** And, by design or by accident, released versions of FTS cannot handle + ** segments that fit entirely on the root node with start_block!=0. + ** + ** Instead, create a synthetic root node that contains nothing but a + ** pointer to the single content node. So that the segment consists of a + ** single leaf and a single interior (root) node. + ** + ** Todo: Better might be to defer allocating space in the %_segments + ** table until we are sure it is needed. + */ + if( iRoot==0 ){ + Blob *pBlock = &pWriter->aNodeWriter[1].block; + blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); + if( rc==SQLITE_OK ){ + pBlock->a[0] = 0x01; + pBlock->n = 1 + sqlite3Fts3PutVarint( + &pBlock->a[1], pWriter->aNodeWriter[0].iBlock + ); } + iRoot = 1; } + pRoot = &pWriter->aNodeWriter[iRoot]; - if( nElem>0 ){ - int nByte = sizeof(Fts3SegReader) + (nElem+1)*sizeof(Fts3HashElem *); - pReader = (Fts3SegReader *)sqlite3_malloc(nByte); - if( !pReader ){ - rc = SQLITE_NOMEM; - }else{ - memset(pReader, 0, nByte); - pReader->iIdx = 0x7FFFFFFF; - pReader->ppNextElem = (Fts3HashElem **)&pReader[1]; - memcpy(pReader->ppNextElem, aElem, nElem*sizeof(Fts3HashElem *)); + /* Flush all currently outstanding nodes to disk. */ + for(i=0; i<iRoot; i++){ + NodeWriter *pNode = &pWriter->aNodeWriter[i]; + if( pNode->block.n>0 && rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); } + sqlite3_free(pNode->block.a); + sqlite3_free(pNode->key.a); } - if( bPrefix ){ - sqlite3_free(aElem); + /* Write the %_segdir record. */ + if( rc==SQLITE_OK ){ + rc = fts3WriteSegdir(p, + pWriter->iAbsLevel+1, /* level */ + pWriter->iIdx, /* idx */ + pWriter->iStart, /* start_block */ + pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ + pWriter->iEnd, /* end_block */ + (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ + pRoot->block.a, pRoot->block.n /* root */ + ); } - *ppReader = pReader; - return rc; + sqlite3_free(pRoot->block.a); + sqlite3_free(pRoot->key.a); + + *pRc = rc; } /* -** Compare the entries pointed to by two Fts3SegReader structures. -** Comparison is as follows: -** -** 1) EOF is greater than not EOF. -** -** 2) The current terms (if any) are compared using memcmp(). If one -** term is a prefix of another, the longer term is considered the -** larger. +** Compare the term in buffer zLhs (size in bytes nLhs) with that in +** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of +** the other, it is considered to be smaller than the other. ** -** 3) By segment age. An older segment is considered larger. +** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve +** if it is greater. */ -static int fts3SegReaderCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc; - if( pLhs->aNode && pRhs->aNode ){ - int rc2 = pLhs->nTerm - pRhs->nTerm; - if( rc2<0 ){ - rc = memcmp(pLhs->zTerm, pRhs->zTerm, pLhs->nTerm); - }else{ - rc = memcmp(pLhs->zTerm, pRhs->zTerm, pRhs->nTerm); - } - if( rc==0 ){ - rc = rc2; - } - }else{ - rc = (pLhs->aNode==0) - (pRhs->aNode==0); - } - if( rc==0 ){ - rc = pRhs->iIdx - pLhs->iIdx; - } - assert( rc!=0 ); - return rc; +static int fts3TermCmp( + const char *zLhs, int nLhs, /* LHS of comparison */ + const char *zRhs, int nRhs /* RHS of comparison */ +){ + int nCmp = MIN(nLhs, nRhs); + int res; + + res = memcmp(zLhs, zRhs, nCmp); + if( res==0 ) res = nLhs - nRhs; + + return res; } + /* -** A different comparison function for SegReader structures. In this -** version, it is assumed that each SegReader points to an entry in -** a doclist for identical terms. Comparison is made as follows: -** -** 1) EOF (end of doclist in this case) is greater than not EOF. +** Query to see if the entry in the %_segments table with blockid iEnd is +** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before +** returning. Otherwise, set *pbRes to 0. ** -** 2) By current docid. +** Or, if an error occurs while querying the database, return an SQLite +** error code. The final value of *pbRes is undefined in this case. ** -** 3) By segment age. An older segment is considered larger. +** This is used to test if a segment is an "appendable" segment. If it +** is, then a NULL entry has been inserted into the %_segments table +** with blockid %_segdir.end_block. */ -static int fts3SegReaderDoclistCmp(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); - if( rc==0 ){ - if( pLhs->iDocid==pRhs->iDocid ){ - rc = pRhs->iIdx - pLhs->iIdx; - }else{ - rc = (pLhs->iDocid > pRhs->iDocid) ? 1 : -1; - } - } - assert( pLhs->aNode && pRhs->aNode ); - return rc; -} -static int fts3SegReaderDoclistCmpRev(Fts3SegReader *pLhs, Fts3SegReader *pRhs){ - int rc = (pLhs->pOffsetList==0)-(pRhs->pOffsetList==0); - if( rc==0 ){ - if( pLhs->iDocid==pRhs->iDocid ){ - rc = pRhs->iIdx - pLhs->iIdx; - }else{ - rc = (pLhs->iDocid < pRhs->iDocid) ? 1 : -1; - } +static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ + int bRes = 0; /* Result to set *pbRes to */ + sqlite3_stmt *pCheck = 0; /* Statement to query database with */ + int rc; /* Return code */ + + rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pCheck, 1, iEnd); + if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; + rc = sqlite3_reset(pCheck); } - assert( pLhs->aNode && pRhs->aNode ); + + *pbRes = bRes; return rc; } /* -** Compare the term that the Fts3SegReader object passed as the first argument -** points to with the term specified by arguments zTerm and nTerm. +** This function is called when initializing an incremental-merge operation. +** It checks if the existing segment with index value iIdx at absolute level +** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the +** merge-writer object *pWriter is initialized to write to it. ** -** If the pSeg iterator is already at EOF, return 0. Otherwise, return -** -ve if the pSeg term is less than zTerm/nTerm, 0 if the two terms are -** equal, or +ve if the pSeg term is greater than zTerm/nTerm. +** An existing segment can be appended to by an incremental merge if: +** +** * It was initially created as an appendable segment (with all required +** space pre-allocated), and +** +** * The first key read from the input (arguments zKey and nKey) is +** greater than the largest key currently stored in the potential +** output segment. */ -static int fts3SegReaderTermCmp( - Fts3SegReader *pSeg, /* Segment reader object */ - const char *zTerm, /* Term to compare to */ - int nTerm /* Size of term zTerm in bytes */ +static int fts3IncrmergeLoad( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of candidate output segment */ + const char *zKey, /* First key to write */ + int nKey, /* Number of bytes in nKey */ + IncrmergeWriter *pWriter /* Populate this object */ ){ - int res = 0; - if( pSeg->aNode ){ - if( pSeg->nTerm>nTerm ){ - res = memcmp(pSeg->zTerm, zTerm, nTerm); + int rc; /* Return code */ + sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ + + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); + if( rc==SQLITE_OK ){ + sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ + sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ + sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ + const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ + int nRoot = 0; /* Size of aRoot[] in bytes */ + int rc2; /* Return code from sqlite3_reset() */ + int bAppendable = 0; /* Set to true if segment is appendable */ + + /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ + sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); + sqlite3_bind_int(pSelect, 2, iIdx); + if( sqlite3_step(pSelect)==SQLITE_ROW ){ + iStart = sqlite3_column_int64(pSelect, 1); + iLeafEnd = sqlite3_column_int64(pSelect, 2); + fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); + if( pWriter->nLeafData<0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + pWriter->bNoLeafData = (pWriter->nLeafData==0); + nRoot = sqlite3_column_bytes(pSelect, 4); + aRoot = sqlite3_column_blob(pSelect, 4); }else{ - res = memcmp(pSeg->zTerm, zTerm, pSeg->nTerm); + return sqlite3_reset(pSelect); } - if( res==0 ){ - res = pSeg->nTerm-nTerm; + + /* Check for the zero-length marker in the %_segments table */ + rc = fts3IsAppendable(p, iEnd, &bAppendable); + + /* Check that zKey/nKey is larger than the largest key the candidate */ + if( rc==SQLITE_OK && bAppendable ){ + char *aLeaf = 0; + int nLeaf = 0; + + rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); + if( rc==SQLITE_OK ){ + NodeReader reader; + for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + assert( reader.aNode ); + } + if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ + bAppendable = 0; + } + nodeReaderRelease(&reader); + } + sqlite3_free(aLeaf); } - } - return res; -} -/* -** Argument apSegment is an array of nSegment elements. It is known that -** the final (nSegment-nSuspect) members are already in sorted order -** (according to the comparison function provided). This function shuffles -** the array around until all entries are in sorted order. -*/ -static void fts3SegReaderSort( - Fts3SegReader **apSegment, /* Array to sort entries of */ - int nSegment, /* Size of apSegment array */ - int nSuspect, /* Unsorted entry count */ - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) /* Comparison function */ -){ - int i; /* Iterator variable */ + if( rc==SQLITE_OK && bAppendable ){ + /* It is possible to append to this segment. Set up the IncrmergeWriter + ** object to do so. */ + int i; + int nHeight = (int)aRoot[0]; + NodeWriter *pNode; - assert( nSuspect<=nSegment ); + pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; + pWriter->iStart = iStart; + pWriter->iEnd = iEnd; + pWriter->iAbsLevel = iAbsLevel; + pWriter->iIdx = iIdx; - if( nSuspect==nSegment ) nSuspect--; - for(i=nSuspect-1; i>=0; i--){ - int j; - for(j=i; j<(nSegment-1); j++){ - Fts3SegReader *pTmp; - if( xCmp(apSegment[j], apSegment[j+1])<0 ) break; - pTmp = apSegment[j+1]; - apSegment[j+1] = apSegment[j]; - apSegment[j] = pTmp; + for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ + pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + } + + pNode = &pWriter->aNodeWriter[nHeight]; + pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; + blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aRoot, nRoot); + pNode->block.n = nRoot; + } + + for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ + NodeReader reader; + pNode = &pWriter->aNodeWriter[i]; + + rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); + while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); + blobGrowBuffer(&pNode->key, reader.term.n, &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->key.a, reader.term.a, reader.term.n); + pNode->key.n = reader.term.n; + if( i>0 ){ + char *aBlock = 0; + int nBlock = 0; + pNode = &pWriter->aNodeWriter[i-1]; + pNode->iBlock = reader.iChild; + rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); + blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); + if( rc==SQLITE_OK ){ + memcpy(pNode->block.a, aBlock, nBlock); + pNode->block.n = nBlock; + } + sqlite3_free(aBlock); + } + } + nodeReaderRelease(&reader); + } } - } -#ifndef NDEBUG - /* Check that the list really is sorted now. */ - for(i=0; i<(nSuspect-1); i++){ - assert( xCmp(apSegment[i], apSegment[i+1])<0 ); + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; } -#endif -} -/* -** Insert a record into the %_segments table. -*/ -static int fts3WriteSegment( - Fts3Table *p, /* Virtual table handle */ - sqlite3_int64 iBlock, /* Block id for new block */ - char *z, /* Pointer to buffer containing block data */ - int n /* Size of buffer z in bytes */ -){ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_INSERT_SEGMENTS, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iBlock); - sqlite3_bind_blob(pStmt, 2, z, n, SQLITE_STATIC); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - } return rc; } /* -** Find the largest relative level number in the table. If successful, set -** *pnMax to this value and return SQLITE_OK. Otherwise, if an error occurs, -** set *pnMax to zero and return an SQLite error code. +** Determine the largest segment index value that exists within absolute +** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus +** one before returning SQLITE_OK. Or, if there are no segments at all +** within level iAbsLevel, set *piIdx to zero. +** +** If an error occurs, return an SQLite error code. The final value of +** *piIdx is undefined in this case. */ -SQLITE_PRIVATE int sqlite3Fts3MaxLevel(Fts3Table *p, int *pnMax){ +static int fts3IncrmergeOutputIdx( + Fts3Table *p, /* FTS Table handle */ + sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ + int *piIdx /* OUT: Next free index at iAbsLevel+1 */ +){ int rc; - int mxLevel = 0; - sqlite3_stmt *pStmt = 0; + sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ - rc = fts3SqlStmt(p, SQL_SELECT_MXLEVEL, &pStmt, 0); + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - mxLevel = sqlite3_column_int(pStmt, 0); - } - rc = sqlite3_reset(pStmt); + sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); + sqlite3_step(pOutputIdx); + *piIdx = sqlite3_column_int(pOutputIdx, 0); + rc = sqlite3_reset(pOutputIdx); } - *pnMax = mxLevel; + return rc; } /* -** Insert a record into the %_segdir table. +** Allocate an appendable output segment on absolute level iAbsLevel+1 +** with idx value iIdx. +** +** In the %_segdir table, a segment is defined by the values in three +** columns: +** +** start_block +** leaves_end_block +** end_block +** +** When an appendable segment is allocated, it is estimated that the +** maximum number of leaf blocks that may be required is the sum of the +** number of leaf blocks consumed by the input segments, plus the number +** of input segments, multiplied by two. This value is stored in stack +** variable nLeafEst. +** +** A total of 16*nLeafEst blocks are allocated when an appendable segment +** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous +** array of leaf nodes starts at the first block allocated. The array +** of interior nodes that are parents of the leaf nodes start at block +** (start_block + (1 + end_block - start_block) / 16). And so on. +** +** In the actual code below, the value "16" is replaced with the +** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. */ -static int fts3WriteSegdir( - Fts3Table *p, /* Virtual table handle */ - sqlite3_int64 iLevel, /* Value for "level" field (absolute level) */ - int iIdx, /* Value for "idx" field */ - sqlite3_int64 iStartBlock, /* Value for "start_block" field */ - sqlite3_int64 iLeafEndBlock, /* Value for "leaves_end_block" field */ - sqlite3_int64 iEndBlock, /* Value for "end_block" field */ - sqlite3_int64 nLeafData, /* Bytes of leaf data in segment */ - char *zRoot, /* Blob value for "root" field */ - int nRoot /* Number of bytes in buffer zRoot */ +static int fts3IncrmergeWriter( + Fts3Table *p, /* Fts3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ + int iIdx, /* Index of new output segment */ + Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ + IncrmergeWriter *pWriter /* Populate this object */ ){ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_INSERT_SEGDIR, &pStmt, 0); + int rc; /* Return Code */ + int i; /* Iterator variable */ + int nLeafEst = 0; /* Blocks allocated for leaf nodes */ + sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ + sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ + + /* Calculate nLeafEst. */ + rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pStmt, 1, iLevel); - sqlite3_bind_int(pStmt, 2, iIdx); - sqlite3_bind_int64(pStmt, 3, iStartBlock); - sqlite3_bind_int64(pStmt, 4, iLeafEndBlock); - if( nLeafData==0 ){ - sqlite3_bind_int64(pStmt, 5, iEndBlock); - }else{ - char *zEnd = sqlite3_mprintf("%lld %lld", iEndBlock, nLeafData); - if( !zEnd ) return SQLITE_NOMEM; - sqlite3_bind_text(pStmt, 5, zEnd, -1, sqlite3_free); + sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); + sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); + if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ + nLeafEst = sqlite3_column_int(pLeafEst, 0); } - sqlite3_bind_blob(pStmt, 6, zRoot, nRoot, SQLITE_STATIC); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); + rc = sqlite3_reset(pLeafEst); } - return rc; + if( rc!=SQLITE_OK ) return rc; + + /* Calculate the first block to use in the output segment */ + rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ + pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); + pWriter->iEnd = pWriter->iStart - 1; + pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; + } + rc = sqlite3_reset(pFirstBlock); + } + if( rc!=SQLITE_OK ) return rc; + + /* Insert the marker in the %_segments table to make sure nobody tries + ** to steal the space just allocated. This is also used to identify + ** appendable segments. */ + rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); + if( rc!=SQLITE_OK ) return rc; + + pWriter->iAbsLevel = iAbsLevel; + pWriter->nLeafEst = nLeafEst; + pWriter->iIdx = iIdx; + + /* Set up the array of NodeWriter objects */ + for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ + pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + } + return SQLITE_OK; } /* -** Return the size of the common prefix (if any) shared by zPrev and -** zNext, in bytes. For example, +** Remove an entry from the %_segdir table. This involves running the +** following two statements: ** -** fts3PrefixCompress("abc", 3, "abcdef", 6) // returns 3 -** fts3PrefixCompress("abX", 3, "abcdef", 6) // returns 2 -** fts3PrefixCompress("abX", 3, "Xbcdef", 6) // returns 0 +** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx +** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx +** +** The DELETE statement removes the specific %_segdir level. The UPDATE +** statement ensures that the remaining segments have contiguously allocated +** idx values. */ -static int fts3PrefixCompress( - const char *zPrev, /* Buffer containing previous term */ - int nPrev, /* Size of buffer zPrev in bytes */ - const char *zNext, /* Buffer containing next term */ - int nNext /* Size of buffer zNext in bytes */ +static int fts3RemoveSegdirEntry( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ + int iIdx /* Index of %_segdir entry to delete */ ){ - int n; - UNUSED_PARAMETER(nNext); - for(n=0; n<nPrev && zPrev[n]==zNext[n]; n++); - return n; + int rc; /* Return code */ + sqlite3_stmt *pDelete = 0; /* DELETE statement */ + + rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDelete, 1, iAbsLevel); + sqlite3_bind_int(pDelete, 2, iIdx); + sqlite3_step(pDelete); + rc = sqlite3_reset(pDelete); + } + + return rc; } /* -** Add term zTerm to the SegmentNode. It is guaranteed that zTerm is larger -** (according to memcmp) than the previous term. +** One or more segments have just been removed from absolute level iAbsLevel. +** Update the 'idx' values of the remaining segments in the level so that +** the idx values are a contiguous sequence starting from 0. */ -static int fts3NodeAddTerm( - Fts3Table *p, /* Virtual table handle */ - SegmentNode **ppTree, /* IN/OUT: SegmentNode handle */ - int isCopyTerm, /* True if zTerm/nTerm is transient */ - const char *zTerm, /* Pointer to buffer containing term */ - int nTerm /* Size of term in bytes */ +static int fts3RepackSegdirLevel( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel /* Absolute level to repack */ ){ - SegmentNode *pTree = *ppTree; - int rc; - SegmentNode *pNew; - - /* First try to append the term to the current node. Return early if - ** this is possible. - */ - if( pTree ){ - int nData = pTree->nData; /* Current size of node in bytes */ - int nReq = nData; /* Required space after adding zTerm */ - int nPrefix; /* Number of bytes of prefix compression */ - int nSuffix; /* Suffix length */ - - nPrefix = fts3PrefixCompress(pTree->zTerm, pTree->nTerm, zTerm, nTerm); - nSuffix = nTerm-nPrefix; - - nReq += sqlite3Fts3VarintLen(nPrefix)+sqlite3Fts3VarintLen(nSuffix)+nSuffix; - if( nReq<=p->nNodeSize || !pTree->zTerm ){ - - if( nReq>p->nNodeSize ){ - /* An unusual case: this is the first term to be added to the node - ** and the static node buffer (p->nNodeSize bytes) is not large - ** enough. Use a separately malloced buffer instead This wastes - ** p->nNodeSize bytes, but since this scenario only comes about when - ** the database contain two terms that share a prefix of almost 2KB, - ** this is not expected to be a serious problem. - */ - assert( pTree->aData==(char *)&pTree[1] ); - pTree->aData = (char *)sqlite3_malloc(nReq); - if( !pTree->aData ){ - return SQLITE_NOMEM; - } - } - - if( pTree->zTerm ){ - /* There is no prefix-length field for first term in a node */ - nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nPrefix); - } - - nData += sqlite3Fts3PutVarint(&pTree->aData[nData], nSuffix); - memcpy(&pTree->aData[nData], &zTerm[nPrefix], nSuffix); - pTree->nData = nData + nSuffix; - pTree->nEntry++; + int rc; /* Return code */ + int *aIdx = 0; /* Array of remaining idx values */ + int nIdx = 0; /* Valid entries in aIdx[] */ + int nAlloc = 0; /* Allocated size of aIdx[] */ + int i; /* Iterator variable */ + sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ + sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ - if( isCopyTerm ){ - if( pTree->nMalloc<nTerm ){ - char *zNew = sqlite3_realloc(pTree->zMalloc, nTerm*2); - if( !zNew ){ - return SQLITE_NOMEM; - } - pTree->nMalloc = nTerm*2; - pTree->zMalloc = zNew; + rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int64(pSelect, 1, iAbsLevel); + while( SQLITE_ROW==sqlite3_step(pSelect) ){ + if( nIdx>=nAlloc ){ + int *aNew; + nAlloc += 16; + aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); + if( !aNew ){ + rc = SQLITE_NOMEM; + break; } - pTree->zTerm = pTree->zMalloc; - memcpy(pTree->zTerm, zTerm, nTerm); - pTree->nTerm = nTerm; - }else{ - pTree->zTerm = (char *)zTerm; - pTree->nTerm = nTerm; + aIdx = aNew; } - return SQLITE_OK; + aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); } + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; } - /* If control flows to here, it was not possible to append zTerm to the - ** current node. Create a new node (a right-sibling of the current node). - ** If this is the first node in the tree, the term is added to it. - ** - ** Otherwise, the term is not added to the new node, it is left empty for - ** now. Instead, the term is inserted into the parent of pTree. If pTree - ** has no parent, one is created here. - */ - pNew = (SegmentNode *)sqlite3_malloc(sizeof(SegmentNode) + p->nNodeSize); - if( !pNew ){ - return SQLITE_NOMEM; + if( rc==SQLITE_OK ){ + rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); + } + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pUpdate, 2, iAbsLevel); } - memset(pNew, 0, sizeof(SegmentNode)); - pNew->nData = 1 + FTS3_VARINT_MAX; - pNew->aData = (char *)&pNew[1]; - if( pTree ){ - SegmentNode *pParent = pTree->pParent; - rc = fts3NodeAddTerm(p, &pParent, isCopyTerm, zTerm, nTerm); - if( pTree->pParent==0 ){ - pTree->pParent = pParent; + assert( p->bIgnoreSavepoint==0 ); + p->bIgnoreSavepoint = 1; + for(i=0; rc==SQLITE_OK && i<nIdx; i++){ + if( aIdx[i]!=i ){ + sqlite3_bind_int(pUpdate, 3, aIdx[i]); + sqlite3_bind_int(pUpdate, 1, i); + sqlite3_step(pUpdate); + rc = sqlite3_reset(pUpdate); } - pTree->pRight = pNew; - pNew->pLeftmost = pTree->pLeftmost; - pNew->pParent = pParent; - pNew->zMalloc = pTree->zMalloc; - pNew->nMalloc = pTree->nMalloc; - pTree->zMalloc = 0; - }else{ - pNew->pLeftmost = pNew; - rc = fts3NodeAddTerm(p, &pNew, isCopyTerm, zTerm, nTerm); } + p->bIgnoreSavepoint = 0; - *ppTree = pNew; + sqlite3_free(aIdx); return rc; } -/* -** Helper function for fts3NodeWrite(). -*/ -static int fts3TreeFinishNode( - SegmentNode *pTree, - int iHeight, - sqlite3_int64 iLeftChild -){ - int nStart; - assert( iHeight>=1 && iHeight<128 ); - nStart = FTS3_VARINT_MAX - sqlite3Fts3VarintLen(iLeftChild); - pTree->aData[nStart] = (char)iHeight; - sqlite3Fts3PutVarint(&pTree->aData[nStart+1], iLeftChild); - return nStart; +static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ + pNode->a[0] = (char)iHeight; + if( iChild ){ + assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); + pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); + }else{ + assert( pNode->nAlloc>=1 ); + pNode->n = 1; + } } /* -** Write the buffer for the segment node pTree and all of its peers to the -** database. Then call this function recursively to write the parent of -** pTree and its peers to the database. -** -** Except, if pTree is a root node, do not write it to the database. Instead, -** set output variables *paRoot and *pnRoot to contain the root node. +** The first two arguments are a pointer to and the size of a segment b-tree +** node. The node may be a leaf or an internal node. ** -** If successful, SQLITE_OK is returned and output variable *piLast is -** set to the largest blockid written to the database (or zero if no -** blocks were written to the db). Otherwise, an SQLite error code is -** returned. +** This function creates a new node image in blob object *pNew by copying +** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) +** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. */ -static int fts3NodeWrite( - Fts3Table *p, /* Virtual table handle */ - SegmentNode *pTree, /* SegmentNode handle */ - int iHeight, /* Height of this node in tree */ - sqlite3_int64 iLeaf, /* Block id of first leaf node */ - sqlite3_int64 iFree, /* Block id of next free slot in %_segments */ - sqlite3_int64 *piLast, /* OUT: Block id of last entry written */ - char **paRoot, /* OUT: Data for root node */ - int *pnRoot /* OUT: Size of root node in bytes */ +static int fts3TruncateNode( + const char *aNode, /* Current node image */ + int nNode, /* Size of aNode in bytes */ + Blob *pNew, /* OUT: Write new node image here */ + const char *zTerm, /* Omit all terms smaller than this */ + int nTerm, /* Size of zTerm in bytes */ + sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ ){ - int rc = SQLITE_OK; + NodeReader reader; /* Reader object */ + Blob prev = {0, 0, 0}; /* Previous term written to new node */ + int rc = SQLITE_OK; /* Return code */ + int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ - if( !pTree->pParent ){ - /* Root node of the tree. */ - int nStart = fts3TreeFinishNode(pTree, iHeight, iLeaf); - *piLast = iFree-1; - *pnRoot = pTree->nData - nStart; - *paRoot = &pTree->aData[nStart]; - }else{ - SegmentNode *pIter; - sqlite3_int64 iNextFree = iFree; - sqlite3_int64 iNextLeaf = iLeaf; - for(pIter=pTree->pLeftmost; pIter && rc==SQLITE_OK; pIter=pIter->pRight){ - int nStart = fts3TreeFinishNode(pIter, iHeight, iNextLeaf); - int nWrite = pIter->nData - nStart; - - rc = fts3WriteSegment(p, iNextFree, &pIter->aData[nStart], nWrite); - iNextFree++; - iNextLeaf += (pIter->nEntry+1); - } - if( rc==SQLITE_OK ){ - assert( iNextLeaf==iFree ); - rc = fts3NodeWrite( - p, pTree->pParent, iHeight+1, iFree, iNextFree, piLast, paRoot, pnRoot - ); + /* Allocate required output space */ + blobGrowBuffer(pNew, nNode, &rc); + if( rc!=SQLITE_OK ) return rc; + pNew->n = 0; + + /* Populate new node buffer */ + for(rc = nodeReaderInit(&reader, aNode, nNode); + rc==SQLITE_OK && reader.aNode; + rc = nodeReaderNext(&reader) + ){ + if( pNew->n==0 ){ + int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); + if( res<0 || (bLeaf==0 && res==0) ) continue; + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; } + rc = fts3AppendToNode( + pNew, &prev, reader.term.a, reader.term.n, + reader.aDoclist, reader.nDoclist + ); + if( rc!=SQLITE_OK ) break; + } + if( pNew->n==0 ){ + fts3StartNode(pNew, (int)aNode[0], reader.iChild); + *piBlock = reader.iChild; } + assert( pNew->n<=pNew->nAlloc ); + nodeReaderRelease(&reader); + sqlite3_free(prev.a); return rc; } /* -** Free all memory allocations associated with the tree pTree. -*/ -static void fts3NodeFree(SegmentNode *pTree){ - if( pTree ){ - SegmentNode *p = pTree->pLeftmost; - fts3NodeFree(p->pParent); - while( p ){ - SegmentNode *pRight = p->pRight; - if( p->aData!=(char *)&p[1] ){ - sqlite3_free(p->aData); - } - assert( pRight==0 || p->zMalloc==0 ); - sqlite3_free(p->zMalloc); - sqlite3_free(p); - p = pRight; - } - } -} - -/* -** Add a term to the segment being constructed by the SegmentWriter object -** *ppWriter. When adding the first term to a segment, *ppWriter should -** be passed NULL. This function will allocate a new SegmentWriter object -** and return it via the input/output variable *ppWriter in this case. +** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute +** level iAbsLevel. This may involve deleting entries from the %_segments +** table, and modifying existing entries in both the %_segments and %_segdir +** tables. ** -** If successful, SQLITE_OK is returned. Otherwise, an SQLite error code. +** SQLITE_OK is returned if the segment is updated successfully. Or an +** SQLite error code otherwise. */ -static int fts3SegWriterAdd( - Fts3Table *p, /* Virtual table handle */ - SegmentWriter **ppWriter, /* IN/OUT: SegmentWriter handle */ - int isCopyTerm, /* True if buffer zTerm must be copied */ - const char *zTerm, /* Pointer to buffer containing term */ - int nTerm, /* Size of term in bytes */ - const char *aDoclist, /* Pointer to buffer containing doclist */ - int nDoclist /* Size of doclist in bytes */ +static int fts3TruncateSegment( + Fts3Table *p, /* FTS3 table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ + int iIdx, /* Index within level of segment to modify */ + const char *zTerm, /* Remove terms smaller than this */ + int nTerm /* Number of bytes in buffer zTerm */ ){ - int nPrefix; /* Size of term prefix in bytes */ - int nSuffix; /* Size of term suffix in bytes */ - int nReq; /* Number of bytes required on leaf page */ - int nData; - SegmentWriter *pWriter = *ppWriter; - - if( !pWriter ){ - int rc; - sqlite3_stmt *pStmt; - - /* Allocate the SegmentWriter structure */ - pWriter = (SegmentWriter *)sqlite3_malloc(sizeof(SegmentWriter)); - if( !pWriter ) return SQLITE_NOMEM; - memset(pWriter, 0, sizeof(SegmentWriter)); - *ppWriter = pWriter; - - /* Allocate a buffer in which to accumulate data */ - pWriter->aData = (char *)sqlite3_malloc(p->nNodeSize); - if( !pWriter->aData ) return SQLITE_NOMEM; - pWriter->nSize = p->nNodeSize; + int rc = SQLITE_OK; /* Return code */ + Blob root = {0,0,0}; /* New root page image */ + Blob block = {0,0,0}; /* Buffer used for any other block */ + sqlite3_int64 iBlock = 0; /* Block id */ + sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ + sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ + sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ - /* Find the next free blockid in the %_segments table */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - pWriter->iFree = sqlite3_column_int64(pStmt, 0); - pWriter->iFirst = pWriter->iFree; + rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); + if( rc==SQLITE_OK ){ + int rc2; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pFetch, 1, iAbsLevel); + sqlite3_bind_int(pFetch, 2, iIdx); + if( SQLITE_ROW==sqlite3_step(pFetch) ){ + const char *aRoot = sqlite3_column_blob(pFetch, 4); + int nRoot = sqlite3_column_bytes(pFetch, 4); + iOldStart = sqlite3_column_int64(pFetch, 1); + rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); } - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ) return rc; + rc2 = sqlite3_reset(pFetch); + if( rc==SQLITE_OK ) rc = rc2; } - nData = pWriter->nData; - - nPrefix = fts3PrefixCompress(pWriter->zTerm, pWriter->nTerm, zTerm, nTerm); - nSuffix = nTerm-nPrefix; - /* Figure out how many bytes are required by this new entry */ - nReq = sqlite3Fts3VarintLen(nPrefix) + /* varint containing prefix size */ - sqlite3Fts3VarintLen(nSuffix) + /* varint containing suffix size */ - nSuffix + /* Term suffix */ - sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ - nDoclist; /* Doclist data */ - - if( nData>0 && nData+nReq>p->nNodeSize ){ - int rc; - - /* The current leaf node is full. Write it out to the database. */ - rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, nData); - if( rc!=SQLITE_OK ) return rc; - p->nLeafAdd++; - - /* Add the current term to the interior node tree. The term added to - ** the interior tree must: - ** - ** a) be greater than the largest term on the leaf node just written - ** to the database (still available in pWriter->zTerm), and - ** - ** b) be less than or equal to the term about to be added to the new - ** leaf node (zTerm/nTerm). - ** - ** In other words, it must be the prefix of zTerm 1 byte longer than - ** the common prefix (if any) of zTerm and pWriter->zTerm. - */ - assert( nPrefix<nTerm ); - rc = fts3NodeAddTerm(p, &pWriter->pTree, isCopyTerm, zTerm, nPrefix+1); - if( rc!=SQLITE_OK ) return rc; - - nData = 0; - pWriter->nTerm = 0; + while( rc==SQLITE_OK && iBlock ){ + char *aBlock = 0; + int nBlock = 0; + iNewStart = iBlock; - nPrefix = 0; - nSuffix = nTerm; - nReq = 1 + /* varint containing prefix size */ - sqlite3Fts3VarintLen(nTerm) + /* varint containing suffix size */ - nTerm + /* Term suffix */ - sqlite3Fts3VarintLen(nDoclist) + /* Size of doclist */ - nDoclist; /* Doclist data */ + rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); + if( rc==SQLITE_OK ){ + rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); + } + if( rc==SQLITE_OK ){ + rc = fts3WriteSegment(p, iNewStart, block.a, block.n); + } + sqlite3_free(aBlock); } - /* Increase the total number of bytes written to account for the new entry. */ - pWriter->nLeafData += nReq; - - /* If the buffer currently allocated is too small for this entry, realloc - ** the buffer to make it large enough. - */ - if( nReq>pWriter->nSize ){ - char *aNew = sqlite3_realloc(pWriter->aData, nReq); - if( !aNew ) return SQLITE_NOMEM; - pWriter->aData = aNew; - pWriter->nSize = nReq; + /* Variable iNewStart now contains the first valid leaf node. */ + if( rc==SQLITE_OK && iNewStart ){ + sqlite3_stmt *pDel = 0; + rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pDel, 1, iOldStart); + sqlite3_bind_int64(pDel, 2, iNewStart-1); + sqlite3_step(pDel); + rc = sqlite3_reset(pDel); + } } - assert( nData+nReq<=pWriter->nSize ); - - /* Append the prefix-compressed term and doclist to the buffer. */ - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nPrefix); - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nSuffix); - memcpy(&pWriter->aData[nData], &zTerm[nPrefix], nSuffix); - nData += nSuffix; - nData += sqlite3Fts3PutVarint(&pWriter->aData[nData], nDoclist); - memcpy(&pWriter->aData[nData], aDoclist, nDoclist); - pWriter->nData = nData + nDoclist; - /* Save the current term so that it can be used to prefix-compress the next. - ** If the isCopyTerm parameter is true, then the buffer pointed to by - ** zTerm is transient, so take a copy of the term data. Otherwise, just - ** store a copy of the pointer. - */ - if( isCopyTerm ){ - if( nTerm>pWriter->nMalloc ){ - char *zNew = sqlite3_realloc(pWriter->zMalloc, nTerm*2); - if( !zNew ){ - return SQLITE_NOMEM; - } - pWriter->nMalloc = nTerm*2; - pWriter->zMalloc = zNew; - pWriter->zTerm = zNew; + if( rc==SQLITE_OK ){ + sqlite3_stmt *pChomp = 0; + rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pChomp, 1, iNewStart); + sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); + sqlite3_bind_int64(pChomp, 3, iAbsLevel); + sqlite3_bind_int(pChomp, 4, iIdx); + sqlite3_step(pChomp); + rc = sqlite3_reset(pChomp); } - assert( pWriter->zTerm==pWriter->zMalloc ); - memcpy(pWriter->zTerm, zTerm, nTerm); - }else{ - pWriter->zTerm = (char *)zTerm; } - pWriter->nTerm = nTerm; - return SQLITE_OK; + sqlite3_free(root.a); + sqlite3_free(block.a); + return rc; } /* -** Flush all data associated with the SegmentWriter object pWriter to the -** database. This function must be called after all terms have been added -** to the segment using fts3SegWriterAdd(). If successful, SQLITE_OK is -** returned. Otherwise, an SQLite error code. +** This function is called after an incrmental-merge operation has run to +** merge (or partially merge) two or more segments from absolute level +** iAbsLevel. +** +** Each input segment is either removed from the db completely (if all of +** its data was copied to the output segment by the incrmerge operation) +** or modified in place so that it no longer contains those entries that +** have been duplicated in the output segment. */ -static int fts3SegWriterFlush( - Fts3Table *p, /* Virtual table handle */ - SegmentWriter *pWriter, /* SegmentWriter to flush to the db */ - sqlite3_int64 iLevel, /* Value for 'level' column of %_segdir */ - int iIdx /* Value for 'idx' column of %_segdir */ +static int fts3IncrmergeChomp( + Fts3Table *p, /* FTS table handle */ + sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ + Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ + int *pnRem /* Number of segments not deleted */ ){ - int rc; /* Return code */ - if( pWriter->pTree ){ - sqlite3_int64 iLast = 0; /* Largest block id written to database */ - sqlite3_int64 iLastLeaf; /* Largest leaf block id written to db */ - char *zRoot = NULL; /* Pointer to buffer containing root node */ - int nRoot = 0; /* Size of buffer zRoot */ + int i; + int nRem = 0; + int rc = SQLITE_OK; - iLastLeaf = pWriter->iFree; - rc = fts3WriteSegment(p, pWriter->iFree++, pWriter->aData, pWriter->nData); - if( rc==SQLITE_OK ){ - rc = fts3NodeWrite(p, pWriter->pTree, 1, - pWriter->iFirst, pWriter->iFree, &iLast, &zRoot, &nRoot); + for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ + Fts3SegReader *pSeg = 0; + int j; + + /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding + ** somewhere in the pCsr->apSegment[] array. */ + for(j=0; ALWAYS(j<pCsr->nSegment); j++){ + pSeg = pCsr->apSegment[j]; + if( pSeg->iIdx==i ) break; } - if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir(p, iLevel, iIdx, - pWriter->iFirst, iLastLeaf, iLast, pWriter->nLeafData, zRoot, nRoot); + assert( j<pCsr->nSegment && pSeg->iIdx==i ); + + if( pSeg->aNode==0 ){ + /* Seg-reader is at EOF. Remove the entire input segment. */ + rc = fts3DeleteSegment(p, pSeg); + if( rc==SQLITE_OK ){ + rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); + } + *pnRem = 0; + }else{ + /* The incremental merge did not copy all the data from this + ** segment to the upper level. The segment is modified in place + ** so that it contains no keys smaller than zTerm/nTerm. */ + const char *zTerm = pSeg->zTerm; + int nTerm = pSeg->nTerm; + rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); + nRem++; } - }else{ - /* The entire tree fits on the root node. Write it to the segdir table. */ - rc = fts3WriteSegdir(p, iLevel, iIdx, - 0, 0, 0, pWriter->nLeafData, pWriter->aData, pWriter->nData); } - p->nLeafAdd++; + + if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ + rc = fts3RepackSegdirLevel(p, iAbsLevel); + } + + *pnRem = nRem; return rc; } /* -** Release all memory held by the SegmentWriter object passed as the -** first argument. +** Store an incr-merge hint in the database. */ -static void fts3SegWriterFree(SegmentWriter *pWriter){ - if( pWriter ){ - sqlite3_free(pWriter->aData); - sqlite3_free(pWriter->zMalloc); - fts3NodeFree(pWriter->pTree); - sqlite3_free(pWriter); +static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ + sqlite3_stmt *pReplace = 0; + int rc; /* Return code */ + + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); + if( rc==SQLITE_OK ){ + sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); + sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); + sqlite3_step(pReplace); + rc = sqlite3_reset(pReplace); } + + return rc; } /* -** The first value in the apVal[] array is assumed to contain an integer. -** This function tests if there exist any documents with docid values that -** are different from that integer. i.e. if deleting the document with docid -** pRowid would mean the FTS3 table were empty. +** Load an incr-merge hint from the database. The incr-merge hint, if one +** exists, is stored in the rowid==1 row of the %_stat table. ** -** If successful, *pisEmpty is set to true if the table is empty except for -** document pRowid, or false otherwise, and SQLITE_OK is returned. If an -** error occurs, an SQLite error code is returned. +** If successful, populate blob *pHint with the value read from the %_stat +** table and return SQLITE_OK. Otherwise, if an error occurs, return an +** SQLite error code. */ -static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){ - sqlite3_stmt *pStmt; +static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ + sqlite3_stmt *pSelect = 0; int rc; - if( p->zContentTbl ){ - /* If using the content=xxx option, assume the table is never empty */ - *pisEmpty = 0; - rc = SQLITE_OK; - }else{ - rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pisEmpty = sqlite3_column_int(pStmt, 0); + + pHint->n = 0; + rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); + if( SQLITE_ROW==sqlite3_step(pSelect) ){ + const char *aHint = sqlite3_column_blob(pSelect, 0); + int nHint = sqlite3_column_bytes(pSelect, 0); + if( aHint ){ + blobGrowBuffer(pHint, nHint, &rc); + if( rc==SQLITE_OK ){ + memcpy(pHint->a, aHint, nHint); + pHint->n = nHint; + } } - rc = sqlite3_reset(pStmt); } + rc2 = sqlite3_reset(pSelect); + if( rc==SQLITE_OK ) rc = rc2; } + return rc; } /* -** Set *pnMax to the largest segment level in the database for the index -** iIndex. -** -** Segment levels are stored in the 'level' column of the %_segdir table. +** If *pRc is not SQLITE_OK when this function is called, it is a no-op. +** Otherwise, append an entry to the hint stored in blob *pHint. Each entry +** consists of two varints, the absolute level number of the input segments +** and the number of input segments. ** -** Return SQLITE_OK if successful, or an SQLite error code if not. +** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, +** set *pRc to an SQLite error code before returning. */ -static int fts3SegmentMaxLevel( - Fts3Table *p, - int iLangid, - int iIndex, - sqlite3_int64 *pnMax +static void fts3IncrmergeHintPush( + Blob *pHint, /* Hint blob to append to */ + i64 iAbsLevel, /* First varint to store in hint */ + int nInput, /* Second varint to store in hint */ + int *pRc /* IN/OUT: Error code */ ){ - sqlite3_stmt *pStmt; - int rc; - assert( iIndex>=0 && iIndex<p->nIndex ); - - /* Set pStmt to the compiled version of: - ** - ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? - ** - ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). - */ - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int64(pStmt, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pStmt, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pnMax = sqlite3_column_int64(pStmt, 0); + blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); + if( *pRc==SQLITE_OK ){ + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); + pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); } - return sqlite3_reset(pStmt); } /* -** iAbsLevel is an absolute level that may be assumed to exist within -** the database. This function checks if it is the largest level number -** within its index. Assuming no error occurs, *pbMax is set to 1 if -** iAbsLevel is indeed the largest level, or 0 otherwise, and SQLITE_OK -** is returned. If an error occurs, an error code is returned and the -** final value of *pbMax is undefined. +** Read the last entry (most recently pushed) from the hint blob *pHint +** and then remove the entry. Write the two values read to *piAbsLevel and +** *pnInput before returning. +** +** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does +** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. */ -static int fts3SegmentIsMaxLevel(Fts3Table *p, i64 iAbsLevel, int *pbMax){ +static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ + const int nHint = pHint->n; + int i; - /* Set pStmt to the compiled version of: - ** - ** SELECT max(level) FROM %Q.'%q_segdir' WHERE level BETWEEN ? AND ? - ** - ** (1024 is actually the value of macro FTS3_SEGDIR_PREFIXLEVEL_STR). - */ - sqlite3_stmt *pStmt; - int rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR_MAX_LEVEL, &pStmt, 0); - if( rc!=SQLITE_OK ) return rc; - sqlite3_bind_int64(pStmt, 1, iAbsLevel+1); - sqlite3_bind_int64(pStmt, 2, - ((iAbsLevel/FTS3_SEGDIR_MAXLEVEL)+1) * FTS3_SEGDIR_MAXLEVEL - ); + i = pHint->n-2; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; + while( i>0 && (pHint->a[i-1] & 0x80) ) i--; - *pbMax = 0; - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *pbMax = sqlite3_column_type(pStmt, 0)==SQLITE_NULL; - } - return sqlite3_reset(pStmt); + pHint->n = i; + i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); + i += fts3GetVarint32(&pHint->a[i], pnInput); + if( i!=nHint ) return FTS_CORRUPT_VTAB; + + return SQLITE_OK; } + /* -** Delete all entries in the %_segments table associated with the segment -** opened with seg-reader pSeg. This function does not affect the contents -** of the %_segdir table. +** Attempt an incremental merge that writes nMerge leaf blocks. +** +** Incremental merges happen nMin segments at a time. The segments +** to be merged are the nMin oldest segments (the ones with the smallest +** values for the _segdir.idx field) in the highest level that contains +** at least nMin segments. Multiple merges might occur in an attempt to +** write the quota of nMerge leaf blocks. */ -static int fts3DeleteSegment( - Fts3Table *p, /* FTS table handle */ - Fts3SegReader *pSeg /* Segment to delete */ -){ - int rc = SQLITE_OK; /* Return code */ - if( pSeg->iStartBlock ){ - sqlite3_stmt *pDelete; /* SQL statement to delete rows */ - rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDelete, 0); +SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ + int rc; /* Return code */ + int nRem = nMerge; /* Number of leaf pages yet to be written */ + Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ + Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ + IncrmergeWriter *pWriter; /* Writer object */ + int nSeg = 0; /* Number of input segments */ + sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ + Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ + int bDirtyHint = 0; /* True if blob 'hint' has been modified */ + + /* Allocate space for the cursor, filter and writer objects */ + const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); + pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); + if( !pWriter ) return SQLITE_NOMEM; + pFilter = (Fts3SegFilter *)&pWriter[1]; + pCsr = (Fts3MultiSegReader *)&pFilter[1]; + + rc = fts3IncrmergeHintLoad(p, &hint); + while( rc==SQLITE_OK && nRem>0 ){ + const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; + sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ + int bUseHint = 0; /* True if attempting to append */ + int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ + + /* Search the %_segdir table for the absolute level with the smallest + ** relative level number that contains at least nMin segments, if any. + ** If one is found, set iAbsLevel to the absolute level number and + ** nSeg to nMin. If no level with at least nMin segments can be found, + ** set nSeg to -1. + */ + rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); + sqlite3_bind_int(pFindLevel, 1, nMin); + if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ + iAbsLevel = sqlite3_column_int64(pFindLevel, 0); + nSeg = nMin; + }else{ + nSeg = -1; + } + rc = sqlite3_reset(pFindLevel); + + /* If the hint read from the %_stat table is not empty, check if the + ** last entry in it specifies a relative level smaller than or equal + ** to the level identified by the block above (if any). If so, this + ** iteration of the loop will work on merging at the hinted level. + */ + if( rc==SQLITE_OK && hint.n ){ + int nHint = hint.n; + sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ + int nHintSeg = 0; /* Hint number of segments */ + + rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); + if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ + iAbsLevel = iHintAbsLevel; + nSeg = nHintSeg; + bUseHint = 1; + bDirtyHint = 1; + }else{ + /* This undoes the effect of the HintPop() above - so that no entry + ** is removed from the hint blob. */ + hint.n = nHint; + } + } + + /* If nSeg is less that zero, then there is no level with at least + ** nMin segments and no hint in the %_stat table. No work to do. + ** Exit early in this case. */ + if( nSeg<0 ) break; + + /* Open a cursor to iterate through the contents of the oldest nSeg + ** indexes of absolute level iAbsLevel. If this cursor is opened using + ** the 'hint' parameters, it is possible that there are less than nSeg + ** segments available in level iAbsLevel. In this case, no work is + ** done on iAbsLevel - fall through to the next iteration of the loop + ** to start work on some other level. */ + memset(pWriter, 0, nAlloc); + pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; + if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, pSeg->iStartBlock); - sqlite3_bind_int64(pDelete, 2, pSeg->iEndBlock); - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); + rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); + assert( bUseHint==1 || bUseHint==0 ); + if( iIdx==0 || (bUseHint && iIdx==1) ){ + int bIgnore = 0; + rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); + if( bIgnore ){ + pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; + } + } + } + + if( rc==SQLITE_OK ){ + rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); + } + if( SQLITE_OK==rc && pCsr->nSegment==nSeg + && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) + && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) + ){ + if( bUseHint && iIdx>0 ){ + const char *zKey = pCsr->zTerm; + int nKey = pCsr->nTerm; + rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); + }else{ + rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); + } + + if( rc==SQLITE_OK && pWriter->nLeafEst ){ + fts3LogMerge(nSeg, iAbsLevel); + do { + rc = fts3IncrmergeAppend(p, pWriter, pCsr); + if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); + if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; + }while( rc==SQLITE_ROW ); + + /* Update or delete the input segments */ + if( rc==SQLITE_OK ){ + nRem -= (1 + pWriter->nWork); + rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); + if( nSeg!=0 ){ + bDirtyHint = 1; + fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); + } + } + } + + if( nSeg!=0 ){ + pWriter->nLeafData = pWriter->nLeafData * -1; + } + fts3IncrmergeRelease(p, pWriter, &rc); + if( nSeg==0 && pWriter->bNoLeafData==0 ){ + fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + } } + + sqlite3Fts3SegReaderFinish(pCsr); + } + + /* Write the hint values into the %_stat table for the next incr-merger */ + if( bDirtyHint && rc==SQLITE_OK ){ + rc = fts3IncrmergeHintStore(p, &hint); } + + sqlite3_free(pWriter); + sqlite3_free(hint.a); return rc; } /* -** This function is used after merging multiple segments into a single large -** segment to delete the old, now redundant, segment b-trees. Specifically, -** it: -** -** 1) Deletes all %_segments entries for the segments associated with -** each of the SegReader objects in the array passed as the third -** argument, and +** Convert the text beginning at *pz into an integer and return +** its value. Advance *pz to point to the first character past +** the integer. +*/ +static int fts3Getint(const char **pz){ + const char *z = *pz; + int i = 0; + while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; + *pz = z; + return i; +} + +/* +** Process statements of the form: ** -** 2) deletes all %_segdir entries with level iLevel, or all %_segdir -** entries regardless of level if (iLevel<0). +** INSERT INTO table(table) VALUES('merge=A,B'); ** -** SQLITE_OK is returned if successful, otherwise an SQLite error code. +** A and B are integers that decode to be the number of leaf pages +** written for the merge, and the minimum number of segments on a level +** before it will be selected for a merge, respectively. */ -static int fts3DeleteSegdir( - Fts3Table *p, /* Virtual table handle */ - int iLangid, /* Language id */ - int iIndex, /* Index for p->aIndex */ - int iLevel, /* Level of %_segdir entries to delete */ - Fts3SegReader **apSegment, /* Array of SegReader objects */ - int nReader /* Size of array apSegment */ +static int fts3DoIncrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing "A,B" */ ){ - int rc = SQLITE_OK; /* Return Code */ - int i; /* Iterator variable */ - sqlite3_stmt *pDelete = 0; /* SQL statement to delete rows */ + int rc; + int nMin = (FTS3_MERGE_COUNT / 2); + int nMerge = 0; + const char *z = zParam; - for(i=0; rc==SQLITE_OK && i<nReader; i++){ - rc = fts3DeleteSegment(p, apSegment[i]); - } - if( rc!=SQLITE_OK ){ - return rc; + /* Read the first integer value */ + nMerge = fts3Getint(&z); + + /* If the first integer value is followed by a ',', read the second + ** integer value. */ + if( z[0]==',' && z[1]!='\0' ){ + z++; + nMin = fts3Getint(&z); } - assert( iLevel>=0 || iLevel==FTS3_SEGCURSOR_ALL ); - if( iLevel==FTS3_SEGCURSOR_ALL ){ - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_RANGE, &pDelete, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, 0)); - sqlite3_bind_int64(pDelete, 2, - getAbsoluteLevel(p, iLangid, iIndex, FTS3_SEGDIR_MAXLEVEL-1) - ); - } + if( z[0]!='\0' || nMin<2 ){ + rc = SQLITE_ERROR; }else{ - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_LEVEL, &pDelete, 0); + rc = SQLITE_OK; + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + } if( rc==SQLITE_OK ){ - sqlite3_bind_int64( - pDelete, 1, getAbsoluteLevel(p, iLangid, iIndex, iLevel) - ); + rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); } + sqlite3Fts3SegmentsClose(p); } - - if( rc==SQLITE_OK ){ - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); - } - return rc; } /* -** When this function is called, buffer *ppList (size *pnList bytes) contains -** a position list that may (or may not) feature multiple columns. This -** function adjusts the pointer *ppList and the length *pnList so that they -** identify the subset of the position list that corresponds to column iCol. +** Process statements of the form: ** -** If there are no entries in the input position list for column iCol, then -** *pnList is set to zero before returning. +** INSERT INTO table(table) VALUES('automerge=X'); ** -** If parameter bZero is non-zero, then any part of the input list following -** the end of the output list is zeroed before returning. +** where X is an integer. X==0 means to turn automerge off. X!=0 means +** turn it on. The setting is persistent. */ -static void fts3ColumnFilter( - int iCol, /* Column to filter on */ - int bZero, /* Zero out anything following *ppList */ - char **ppList, /* IN/OUT: Pointer to position list */ - int *pnList /* IN/OUT: Size of buffer *ppList in bytes */ +static int fts3DoAutoincrmerge( + Fts3Table *p, /* FTS3 table handle */ + const char *zParam /* Nul-terminated string containing boolean */ ){ - char *pList = *ppList; - int nList = *pnList; - char *pEnd = &pList[nList]; - int iCurrent = 0; - char *p = pList; - - assert( iCol>=0 ); - while( 1 ){ - char c = 0; - while( p<pEnd && (c | *p)&0xFE ) c = *p++ & 0x80; - - if( iCol==iCurrent ){ - nList = (int)(p - pList); - break; - } - - nList -= (int)(p - pList); - pList = p; - if( nList==0 ){ - break; - } - p = &pList[1]; - p += fts3GetVarint32(p, &iCurrent); + int rc = SQLITE_OK; + sqlite3_stmt *pStmt = 0; + p->nAutoincrmerge = fts3Getint(&zParam); + if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ + p->nAutoincrmerge = 8; } - - if( bZero && &pList[nList]!=pEnd ){ - memset(&pList[nList], 0, pEnd - &pList[nList]); + if( !p->bHasStat ){ + assert( p->bFts4==0 ); + sqlite3Fts3CreateStatTable(&rc, p); + if( rc ) return rc; } - *ppList = pList; - *pnList = nList; + rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); + if( rc ) return rc; + sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); + sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); + sqlite3_step(pStmt); + rc = sqlite3_reset(pStmt); + return rc; } /* -** Cache data in the Fts3MultiSegReader.aBuffer[] buffer (overwriting any -** existing data). Grow the buffer if required. -** -** If successful, return SQLITE_OK. Otherwise, if an OOM error is encountered -** trying to resize the buffer, return SQLITE_NOMEM. +** Return a 64-bit checksum for the FTS index entry specified by the +** arguments to this function. */ -static int fts3MsrBufferData( - Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ - char *pList, - int nList +static u64 fts3ChecksumEntry( + const char *zTerm, /* Pointer to buffer containing term */ + int nTerm, /* Size of zTerm in bytes */ + int iLangid, /* Language id for current row */ + int iIndex, /* Index (0..Fts3Table.nIndex-1) */ + i64 iDocid, /* Docid for current row. */ + int iCol, /* Column number */ + int iPos /* Position */ ){ - if( nList>pMsr->nBuffer ){ - char *pNew; - pMsr->nBuffer = nList*2; - pNew = (char *)sqlite3_realloc(pMsr->aBuffer, pMsr->nBuffer); - if( !pNew ) return SQLITE_NOMEM; - pMsr->aBuffer = pNew; - } + int i; + u64 ret = (u64)iDocid; - memcpy(pMsr->aBuffer, pList, nList); - return SQLITE_OK; + ret += (ret<<3) + iLangid; + ret += (ret<<3) + iIndex; + ret += (ret<<3) + iCol; + ret += (ret<<3) + iPos; + for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i]; + + return ret; } -SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pMsr, /* Multi-segment-reader handle */ - sqlite3_int64 *piDocid, /* OUT: Docid value */ - char **paPoslist, /* OUT: Pointer to position list */ - int *pnPoslist /* OUT: Size of position list in bytes */ +/* +** Return a checksum of all entries in the FTS index that correspond to +** language id iLangid. The checksum is calculated by XORing the checksums +** of each individual entry (see fts3ChecksumEntry()) together. +** +** If successful, the checksum value is returned and *pRc set to SQLITE_OK. +** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The +** return value is undefined in this case. +*/ +static u64 fts3ChecksumIndex( + Fts3Table *p, /* FTS3 table handle */ + int iLangid, /* Language id to return cksum for */ + int iIndex, /* Index to cksum (0..p->nIndex-1) */ + int *pRc /* OUT: Return code */ ){ - int nMerge = pMsr->nAdvance; - Fts3SegReader **apSegment = pMsr->apSegment; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp - ); - - if( nMerge==0 ){ - *paPoslist = 0; - return SQLITE_OK; - } + Fts3SegFilter filter; + Fts3MultiSegReader csr; + int rc; + u64 cksum = 0; - while( 1 ){ - Fts3SegReader *pSeg; - pSeg = pMsr->apSegment[0]; + assert( *pRc==SQLITE_OK ); - if( pSeg->pOffsetList==0 ){ - *paPoslist = 0; - break; - }else{ - int rc; - char *pList; - int nList; - int j; - sqlite3_int64 iDocid = apSegment[0]->iDocid; + memset(&filter, 0, sizeof(filter)); + memset(&csr, 0, sizeof(csr)); + filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; + filter.flags |= FTS3_SEGMENT_SCAN; - rc = fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); - j = 1; - while( rc==SQLITE_OK - && j<nMerge - && apSegment[j]->pOffsetList - && apSegment[j]->iDocid==iDocid - ){ - rc = fts3SegReaderNextDocid(p, apSegment[j], 0, 0); - j++; - } - if( rc!=SQLITE_OK ) return rc; - fts3SegReaderSort(pMsr->apSegment, nMerge, j, xCmp); + rc = sqlite3Fts3SegReaderCursor( + p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr + ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + } - if( nList>0 && fts3SegReaderIsPending(apSegment[0]) ){ - rc = fts3MsrBufferData(pMsr, pList, nList+1); - if( rc!=SQLITE_OK ) return rc; - assert( (pMsr->aBuffer[nList] & 0xFE)==0x00 ); - pList = pMsr->aBuffer; - } + if( rc==SQLITE_OK ){ + while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ + char *pCsr = csr.aDoclist; + char *pEnd = &pCsr[csr.nDoclist]; - if( pMsr->iColFilter>=0 ){ - fts3ColumnFilter(pMsr->iColFilter, 1, &pList, &nList); - } + i64 iDocid = 0; + i64 iCol = 0; + i64 iPos = 0; - if( nList>0 ){ - *paPoslist = pList; - *piDocid = iDocid; - *pnPoslist = nList; - break; + pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); + while( pCsr<pEnd ){ + i64 iVal = 0; + pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); + if( pCsr<pEnd ){ + if( iVal==0 || iVal==1 ){ + iCol = 0; + iPos = 0; + if( iVal ){ + pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); + }else{ + pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); + iDocid += iVal; + } + }else{ + iPos += (iVal - 2); + cksum = cksum ^ fts3ChecksumEntry( + csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, + (int)iCol, (int)iPos + ); + } + } } } } + sqlite3Fts3SegReaderFinish(&csr); - return SQLITE_OK; + *pRc = rc; + return cksum; } -static int fts3SegReaderStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - const char *zTerm, /* Term searched for (or NULL) */ - int nTerm /* Length of zTerm in bytes */ -){ - int i; - int nSeg = pCsr->nSegment; - - /* If the Fts3SegFilter defines a specific term (or term prefix) to search - ** for, then advance each segment iterator until it points to a term of - ** equal or greater value than the specified term. This prevents many - ** unnecessary merge/sort operations for the case where single segment - ** b-tree leaf nodes contain more than one term. - */ - for(i=0; pCsr->bRestart==0 && i<pCsr->nSegment; i++){ - int res = 0; - Fts3SegReader *pSeg = pCsr->apSegment[i]; - do { - int rc = fts3SegReaderNext(p, pSeg, 0); - if( rc!=SQLITE_OK ) return rc; - }while( zTerm && (res = fts3SegReaderTermCmp(pSeg, zTerm, nTerm))<0 ); +/* +** Check if the contents of the FTS index match the current contents of the +** content table. If no error occurs and the contents do match, set *pbOk +** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk +** to false before returning. +** +** If an error occurs (e.g. an OOM or IO error), return an SQLite error +** code. The final value of *pbOk is undefined in this case. +*/ +static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ + int rc = SQLITE_OK; /* Return code */ + u64 cksum1 = 0; /* Checksum based on FTS index contents */ + u64 cksum2 = 0; /* Checksum based on %_content contents */ + sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ - if( pSeg->bLookup && res!=0 ){ - fts3SegReaderSetEof(pSeg); + /* This block calculates the checksum according to the FTS index. */ + rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); + if( rc==SQLITE_OK ){ + int rc2; + sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); + sqlite3_bind_int(pAllLangid, 2, p->nIndex); + while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ + int iLangid = sqlite3_column_int(pAllLangid, 0); + int i; + for(i=0; i<p->nIndex; i++){ + cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); + } } + rc2 = sqlite3_reset(pAllLangid); + if( rc==SQLITE_OK ) rc = rc2; } - fts3SegReaderSort(pCsr->apSegment, nSeg, nSeg, fts3SegReaderCmp); - - return SQLITE_OK; -} -SQLITE_PRIVATE int sqlite3Fts3SegReaderStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - Fts3SegFilter *pFilter /* Restrictions on range of iteration */ -){ - pCsr->pFilter = pFilter; - return fts3SegReaderStart(p, pCsr, pFilter->zTerm, pFilter->nTerm); -} + /* This block calculates the checksum according to the %_content table */ + if( rc==SQLITE_OK ){ + sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; + sqlite3_stmt *pStmt = 0; + char *zSql; + + zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); + sqlite3_free(zSql); + } -SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr, /* Cursor object */ - int iCol, /* Column to match on. */ - const char *zTerm, /* Term to iterate through a doclist for */ - int nTerm /* Number of bytes in zTerm */ -){ - int i; - int rc; - int nSegment = pCsr->nSegment; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp - ); + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + i64 iDocid = sqlite3_column_int64(pStmt, 0); + int iLang = langidFromSelect(p, pStmt); + int iCol; - assert( pCsr->pFilter==0 ); - assert( zTerm && nTerm>0 ); + for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ + if( p->abNotindexed[iCol]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); + int nText = sqlite3_column_bytes(pStmt, iCol+1); + sqlite3_tokenizer_cursor *pT = 0; - /* Advance each segment iterator until it points to the term zTerm/nTerm. */ - rc = fts3SegReaderStart(p, pCsr, zTerm, nTerm); - if( rc!=SQLITE_OK ) return rc; + rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ - /* Determine how many of the segments actually point to zTerm/nTerm. */ - for(i=0; i<nSegment; i++){ - Fts3SegReader *pSeg = pCsr->apSegment[i]; - if( !pSeg->aNode || fts3SegReaderTermCmp(pSeg, zTerm, nTerm) ){ - break; + rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); + if( rc==SQLITE_OK ){ + int i; + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, nToken, iLang, 0, iDocid, iCol, iPos + ); + for(i=1; i<p->nIndex; i++){ + if( p->aIndex[i].nPrefix<=nToken ){ + cksum2 = cksum2 ^ fts3ChecksumEntry( + zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos + ); + } + } + } + } + if( pT ) pModule->xClose(pT); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + } + } } - } - pCsr->nAdvance = i; - /* Advance each of the segments to point to the first docid. */ - for(i=0; i<pCsr->nAdvance; i++){ - rc = fts3SegReaderFirstDocid(p, pCsr->apSegment[i]); - if( rc!=SQLITE_OK ) return rc; + sqlite3_finalize(pStmt); } - fts3SegReaderSort(pCsr->apSegment, i, i, xCmp); - assert( iCol<0 || iCol<p->nColumn ); - pCsr->iColFilter = iCol; - - return SQLITE_OK; + *pbOk = (cksum1==cksum2); + return rc; } /* -** This function is called on a MultiSegReader that has been started using -** sqlite3Fts3MsrIncrStart(). One or more calls to MsrIncrNext() may also -** have been made. Calling this function puts the MultiSegReader in such -** a state that if the next two calls are: +** Run the integrity-check. If no error occurs and the current contents of +** the FTS index are correct, return SQLITE_OK. Or, if the contents of the +** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. ** -** sqlite3Fts3SegReaderStart() -** sqlite3Fts3SegReaderStep() +** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite +** error code. ** -** then the entire doclist for the term is available in -** MultiSegReader.aDoclist/nDoclist. +** The integrity-check works as follows. For each token and indexed token +** prefix in the document set, a 64-bit checksum is calculated (by code +** in fts3ChecksumEntry()) based on the following: +** +** + The index number (0 for the main index, 1 for the first prefix +** index etc.), +** + The token (or token prefix) text itself, +** + The language-id of the row it appears in, +** + The docid of the row it appears in, +** + The column it appears in, and +** + The tokens position within that column. +** +** The checksums for all entries in the index are XORed together to create +** a single checksum for the entire index. +** +** The integrity-check code calculates the same checksum in two ways: +** +** 1. By scanning the contents of the FTS index, and +** 2. By scanning and tokenizing the content table. +** +** If the two checksums are identical, the integrity-check is deemed to have +** passed. */ -SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr){ - int i; /* Used to iterate through segment-readers */ +static int fts3DoIntegrityCheck( + Fts3Table *p /* FTS3 table handle */ +){ + int rc; + int bOk = 0; + rc = fts3IntegrityCheck(p, &bOk); + if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; + return rc; +} - assert( pCsr->zTerm==0 ); - assert( pCsr->nTerm==0 ); - assert( pCsr->aDoclist==0 ); - assert( pCsr->nDoclist==0 ); +/* +** Handle a 'special' INSERT of the form: +** +** "INSERT INTO tbl(tbl) VALUES(<expr>)" +** +** Argument pVal contains the result of <expr>. Currently the only +** meaningful value to insert is the text 'optimize'. +*/ +static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ + int rc; /* Return Code */ + const char *zVal = (const char *)sqlite3_value_text(pVal); + int nVal = sqlite3_value_bytes(pVal); - pCsr->nAdvance = 0; - pCsr->bRestart = 1; - for(i=0; i<pCsr->nSegment; i++){ - pCsr->apSegment[i]->pOffsetList = 0; - pCsr->apSegment[i]->nOffsetList = 0; - pCsr->apSegment[i]->iDocid = 0; + if( !zVal ){ + return SQLITE_NOMEM; + }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ + rc = fts3DoOptimize(p, 0); + }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ + rc = fts3DoRebuild(p); + }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ + rc = fts3DoIntegrityCheck(p); + }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ + rc = fts3DoIncrmerge(p, &zVal[6]); + }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ + rc = fts3DoAutoincrmerge(p, &zVal[10]); +#ifdef SQLITE_TEST + }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ + p->nNodeSize = atoi(&zVal[9]); + rc = SQLITE_OK; + }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ + p->nMaxPendingData = atoi(&zVal[11]); + rc = SQLITE_OK; + }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ + p->bNoIncrDoclist = atoi(&zVal[21]); + rc = SQLITE_OK; +#endif + }else{ + rc = SQLITE_ERROR; } - return SQLITE_OK; + return rc; } +#ifndef SQLITE_DISABLE_FTS4_DEFERRED +/* +** Delete all cached deferred doclists. Deferred doclists are cached +** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ + fts3PendingListDelete(pDef->pList); + pDef->pList = 0; + } +} -SQLITE_PRIVATE int sqlite3Fts3SegReaderStep( - Fts3Table *p, /* Virtual table handle */ - Fts3MultiSegReader *pCsr /* Cursor object */ -){ - int rc = SQLITE_OK; - - int isIgnoreEmpty = (pCsr->pFilter->flags & FTS3_SEGMENT_IGNORE_EMPTY); - int isRequirePos = (pCsr->pFilter->flags & FTS3_SEGMENT_REQUIRE_POS); - int isColFilter = (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER); - int isPrefix = (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX); - int isScan = (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN); - int isFirst = (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST); - - Fts3SegReader **apSegment = pCsr->apSegment; - int nSegment = pCsr->nSegment; - Fts3SegFilter *pFilter = pCsr->pFilter; - int (*xCmp)(Fts3SegReader *, Fts3SegReader *) = ( - p->bDescIdx ? fts3SegReaderDoclistCmpRev : fts3SegReaderDoclistCmp - ); - - if( pCsr->nSegment==0 ) return SQLITE_OK; - - do { - int nMerge; - int i; - - /* Advance the first pCsr->nAdvance entries in the apSegment[] array - ** forward. Then sort the list in order of current term again. - */ - for(i=0; i<pCsr->nAdvance; i++){ - Fts3SegReader *pSeg = apSegment[i]; - if( pSeg->bLookup ){ - fts3SegReaderSetEof(pSeg); - }else{ - rc = fts3SegReaderNext(p, pSeg, 0); - } - if( rc!=SQLITE_OK ) return rc; - } - fts3SegReaderSort(apSegment, nSegment, pCsr->nAdvance, fts3SegReaderCmp); - pCsr->nAdvance = 0; - - /* If all the seg-readers are at EOF, we're finished. return SQLITE_OK. */ - assert( rc==SQLITE_OK ); - if( apSegment[0]->aNode==0 ) break; - - pCsr->nTerm = apSegment[0]->nTerm; - pCsr->zTerm = apSegment[0]->zTerm; - - /* If this is a prefix-search, and if the term that apSegment[0] points - ** to does not share a suffix with pFilter->zTerm/nTerm, then all - ** required callbacks have been made. In this case exit early. - ** - ** Similarly, if this is a search for an exact match, and the first term - ** of segment apSegment[0] is not a match, exit early. - */ - if( pFilter->zTerm && !isScan ){ - if( pCsr->nTerm<pFilter->nTerm - || (!isPrefix && pCsr->nTerm>pFilter->nTerm) - || memcmp(pCsr->zTerm, pFilter->zTerm, pFilter->nTerm) - ){ - break; - } - } - - nMerge = 1; - while( nMerge<nSegment - && apSegment[nMerge]->aNode - && apSegment[nMerge]->nTerm==pCsr->nTerm - && 0==memcmp(pCsr->zTerm, apSegment[nMerge]->zTerm, pCsr->nTerm) - ){ - nMerge++; - } - - assert( isIgnoreEmpty || (isRequirePos && !isColFilter) ); - if( nMerge==1 - && !isIgnoreEmpty - && !isFirst - && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0) - ){ - pCsr->nDoclist = apSegment[0]->nDoclist; - if( fts3SegReaderIsPending(apSegment[0]) ){ - rc = fts3MsrBufferData(pCsr, apSegment[0]->aDoclist, pCsr->nDoclist); - pCsr->aDoclist = pCsr->aBuffer; - }else{ - pCsr->aDoclist = apSegment[0]->aDoclist; - } - if( rc==SQLITE_OK ) rc = SQLITE_ROW; - }else{ - int nDoclist = 0; /* Size of doclist */ - sqlite3_int64 iPrev = 0; /* Previous docid stored in doclist */ - - /* The current term of the first nMerge entries in the array - ** of Fts3SegReader objects is the same. The doclists must be merged - ** and a single term returned with the merged doclist. - */ - for(i=0; i<nMerge; i++){ - fts3SegReaderFirstDocid(p, apSegment[i]); - } - fts3SegReaderSort(apSegment, nMerge, nMerge, xCmp); - while( apSegment[0]->pOffsetList ){ - int j; /* Number of segments that share a docid */ - char *pList = 0; - int nList = 0; - int nByte; - sqlite3_int64 iDocid = apSegment[0]->iDocid; - fts3SegReaderNextDocid(p, apSegment[0], &pList, &nList); - j = 1; - while( j<nMerge - && apSegment[j]->pOffsetList - && apSegment[j]->iDocid==iDocid - ){ - fts3SegReaderNextDocid(p, apSegment[j], 0, 0); - j++; - } - - if( isColFilter ){ - fts3ColumnFilter(pFilter->iCol, 0, &pList, &nList); - } - - if( !isIgnoreEmpty || nList>0 ){ +/* +** Free all entries in the pCsr->pDeffered list. Entries are added to +** this list using sqlite3Fts3DeferToken(). +*/ +SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ + Fts3DeferredToken *pDef; + Fts3DeferredToken *pNext; + for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ + pNext = pDef->pNext; + fts3PendingListDelete(pDef->pList); + sqlite3_free(pDef); + } + pCsr->pDeferred = 0; +} - /* Calculate the 'docid' delta value to write into the merged - ** doclist. */ - sqlite3_int64 iDelta; - if( p->bDescIdx && nDoclist>0 ){ - iDelta = iPrev - iDocid; - }else{ - iDelta = iDocid - iPrev; - } - assert( iDelta>0 || (nDoclist==0 && iDelta==iDocid) ); - assert( nDoclist>0 || iDelta==iDocid ); +/* +** Generate deferred-doclists for all tokens in the pCsr->pDeferred list +** based on the row that pCsr currently points to. +** +** A deferred-doclist is like any other doclist with position information +** included, except that it only contains entries for a single row of the +** table, not for all rows. +*/ +SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ + int rc = SQLITE_OK; /* Return code */ + if( pCsr->pDeferred ){ + int i; /* Used to iterate through table columns */ + sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ + Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ + + Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer *pT = p->pTokenizer; + sqlite3_tokenizer_module const *pModule = pT->pModule; + + assert( pCsr->isRequireSeek==0 ); + iDocid = sqlite3_column_int64(pCsr->pStmt, 0); + + for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ + if( p->abNotindexed[i]==0 ){ + const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); + sqlite3_tokenizer_cursor *pTC = 0; - nByte = sqlite3Fts3VarintLen(iDelta) + (isRequirePos?nList+1:0); - if( nDoclist+nByte>pCsr->nBuffer ){ - char *aNew; - pCsr->nBuffer = (nDoclist+nByte)*2; - aNew = sqlite3_realloc(pCsr->aBuffer, pCsr->nBuffer); - if( !aNew ){ - return SQLITE_NOMEM; - } - pCsr->aBuffer = aNew; - } + rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); + while( rc==SQLITE_OK ){ + char const *zToken; /* Buffer containing token */ + int nToken = 0; /* Number of bytes in token */ + int iDum1 = 0, iDum2 = 0; /* Dummy variables */ + int iPos = 0; /* Position of token in zText */ - if( isFirst ){ - char *a = &pCsr->aBuffer[nDoclist]; - int nWrite; - - nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a); - if( nWrite ){ - iPrev = iDocid; - nDoclist += nWrite; - } - }else{ - nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta); - iPrev = iDocid; - if( isRequirePos ){ - memcpy(&pCsr->aBuffer[nDoclist], pList, nList); - nDoclist += nList; - pCsr->aBuffer[nDoclist++] = '\0'; + rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + Fts3PhraseToken *pPT = pDef->pToken; + if( (pDef->iCol>=p->nColumn || pDef->iCol==i) + && (pPT->bFirst==0 || iPos==0) + && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) + && (0==memcmp(zToken, pPT->z, pPT->n)) + ){ + fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); } } } - - fts3SegReaderSort(apSegment, nMerge, j, xCmp); + if( pTC ) pModule->xClose(pTC); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; } - if( nDoclist>0 ){ - pCsr->aDoclist = pCsr->aBuffer; - pCsr->nDoclist = nDoclist; - rc = SQLITE_ROW; + } + + for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ + if( pDef->pList ){ + rc = fts3PendingListAppendVarint(&pDef->pList, 0); } } - pCsr->nAdvance = nMerge; - }while( rc==SQLITE_OK ); + } return rc; } - -SQLITE_PRIVATE void sqlite3Fts3SegReaderFinish( - Fts3MultiSegReader *pCsr /* Cursor object */ +SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( + Fts3DeferredToken *p, + char **ppData, + int *pnData ){ - if( pCsr ){ - int i; - for(i=0; i<pCsr->nSegment; i++){ - sqlite3Fts3SegReaderFree(pCsr->apSegment[i]); - } - sqlite3_free(pCsr->apSegment); - sqlite3_free(pCsr->aBuffer); + char *pRet; + int nSkip; + sqlite3_int64 dummy; - pCsr->nSegment = 0; - pCsr->apSegment = 0; - pCsr->aBuffer = 0; + *ppData = 0; + *pnData = 0; + + if( p->pList==0 ){ + return SQLITE_OK; } + + pRet = (char *)sqlite3_malloc(p->pList->nData); + if( !pRet ) return SQLITE_NOMEM; + + nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); + *pnData = p->pList->nData - nSkip; + *ppData = pRet; + + memcpy(pRet, &p->pList->aData[nSkip], *pnData); + return SQLITE_OK; } /* -** Decode the "end_block" field, selected by column iCol of the SELECT -** statement passed as the first argument. -** -** The "end_block" field may contain either an integer, or a text field -** containing the text representation of two non-negative integers separated -** by one or more space (0x20) characters. In the first case, set *piEndBlock -** to the integer value and *pnByte to zero before returning. In the second, -** set *piEndBlock to the first value and *pnByte to the second. +** Add an entry for token pToken to the pCsr->pDeferred list. */ -static void fts3ReadEndBlockField( - sqlite3_stmt *pStmt, - int iCol, - i64 *piEndBlock, - i64 *pnByte +SQLITE_PRIVATE int sqlite3Fts3DeferToken( + Fts3Cursor *pCsr, /* Fts3 table cursor */ + Fts3PhraseToken *pToken, /* Token to defer */ + int iCol /* Column that token must appear in (or -1) */ ){ - const unsigned char *zText = sqlite3_column_text(pStmt, iCol); - if( zText ){ - int i; - int iMul = 1; - i64 iVal = 0; - for(i=0; zText[i]>='0' && zText[i]<='9'; i++){ - iVal = iVal*10 + (zText[i] - '0'); - } - *piEndBlock = iVal; - while( zText[i]==' ' ) i++; - iVal = 0; - if( zText[i]=='-' ){ - i++; - iMul = -1; - } - for(/* no-op */; zText[i]>='0' && zText[i]<='9'; i++){ - iVal = iVal*10 + (zText[i] - '0'); - } - *pnByte = (iVal * (i64)iMul); + Fts3DeferredToken *pDeferred; + pDeferred = sqlite3_malloc(sizeof(*pDeferred)); + if( !pDeferred ){ + return SQLITE_NOMEM; } -} + memset(pDeferred, 0, sizeof(*pDeferred)); + pDeferred->pToken = pToken; + pDeferred->pNext = pCsr->pDeferred; + pDeferred->iCol = iCol; + pCsr->pDeferred = pDeferred; + + assert( pToken->pDeferred==0 ); + pToken->pDeferred = pDeferred; + return SQLITE_OK; +} +#endif /* -** A segment of size nByte bytes has just been written to absolute level -** iAbsLevel. Promote any segments that should be promoted as a result. +** SQLite value pRowid contains the rowid of a row that may or may not be +** present in the FTS3 table. If it is, delete it and adjust the contents +** of subsiduary data structures accordingly. */ -static int fts3PromoteSegments( - Fts3Table *p, /* FTS table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level just updated */ - sqlite3_int64 nByte /* Size of new segment at iAbsLevel */ +static int fts3DeleteByRowid( + Fts3Table *p, + sqlite3_value *pRowid, + int *pnChng, /* IN/OUT: Decrement if row is deleted */ + u32 *aSzDel ){ - int rc = SQLITE_OK; - sqlite3_stmt *pRange; - - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL_RANGE2, &pRange, 0); - - if( rc==SQLITE_OK ){ - int bOk = 0; - i64 iLast = (iAbsLevel/FTS3_SEGDIR_MAXLEVEL + 1) * FTS3_SEGDIR_MAXLEVEL - 1; - i64 nLimit = (nByte*3)/2; - - /* Loop through all entries in the %_segdir table corresponding to - ** segments in this index on levels greater than iAbsLevel. If there is - ** at least one such segment, and it is possible to determine that all - ** such segments are smaller than nLimit bytes in size, they will be - ** promoted to level iAbsLevel. */ - sqlite3_bind_int64(pRange, 1, iAbsLevel+1); - sqlite3_bind_int64(pRange, 2, iLast); - while( SQLITE_ROW==sqlite3_step(pRange) ){ - i64 nSize = 0, dummy; - fts3ReadEndBlockField(pRange, 2, &dummy, &nSize); - if( nSize<=0 || nSize>nLimit ){ - /* If nSize==0, then the %_segdir.end_block field does not not - ** contain a size value. This happens if it was written by an - ** old version of FTS. In this case it is not possible to determine - ** the size of the segment, and so segment promotion does not - ** take place. */ - bOk = 0; - break; - } - bOk = 1; - } - rc = sqlite3_reset(pRange); - - if( bOk ){ - int iIdx = 0; - sqlite3_stmt *pUpdate1 = 0; - sqlite3_stmt *pUpdate2 = 0; - - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL_IDX, &pUpdate1, 0); - } - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_UPDATE_LEVEL, &pUpdate2, 0); - } - - if( rc==SQLITE_OK ){ + int rc = SQLITE_OK; /* Return code */ + int bFound = 0; /* True if *pRowid really is in the table */ - /* Loop through all %_segdir entries for segments in this index with - ** levels equal to or greater than iAbsLevel. As each entry is visited, - ** updated it to set (level = -1) and (idx = N), where N is 0 for the - ** oldest segment in the range, 1 for the next oldest, and so on. - ** - ** In other words, move all segments being promoted to level -1, - ** setting the "idx" fields as appropriate to keep them in the same - ** order. The contents of level -1 (which is never used, except - ** transiently here), will be moved back to level iAbsLevel below. */ - sqlite3_bind_int64(pRange, 1, iAbsLevel); - while( SQLITE_ROW==sqlite3_step(pRange) ){ - sqlite3_bind_int(pUpdate1, 1, iIdx++); - sqlite3_bind_int(pUpdate1, 2, sqlite3_column_int(pRange, 0)); - sqlite3_bind_int(pUpdate1, 3, sqlite3_column_int(pRange, 1)); - sqlite3_step(pUpdate1); - rc = sqlite3_reset(pUpdate1); - if( rc!=SQLITE_OK ){ - sqlite3_reset(pRange); - break; - } + fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); + if( bFound && rc==SQLITE_OK ){ + int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ + rc = fts3IsEmpty(p, pRowid, &isEmpty); + if( rc==SQLITE_OK ){ + if( isEmpty ){ + /* Deleting this row means the whole table is empty. In this case + ** delete the contents of all three tables and throw away any + ** data in the pendingTerms hash table. */ + rc = fts3DeleteAll(p, 1); + *pnChng = 0; + memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); + }else{ + *pnChng = *pnChng - 1; + if( p->zContentTbl==0 ){ + fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); + } + if( p->bHasDocsize ){ + fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); } - } - if( rc==SQLITE_OK ){ - rc = sqlite3_reset(pRange); - } - - /* Move level -1 to level iAbsLevel */ - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pUpdate2, 1, iAbsLevel); - sqlite3_step(pUpdate2); - rc = sqlite3_reset(pUpdate2); } } } - return rc; } /* -** Merge all level iLevel segments in the database into a single -** iLevel+1 segment. Or, if iLevel<0, merge all segments into a -** single segment with a level equal to the numerically largest level -** currently present in the database. +** This function does the work for the xUpdate method of FTS3 virtual +** tables. The schema of the virtual table being: ** -** If this function is called with iLevel<0, but there is only one -** segment in the database, SQLITE_DONE is returned immediately. -** Otherwise, if successful, SQLITE_OK is returned. If an error occurs, -** an SQLite error code is returned. +** CREATE TABLE <table name>( +** <user columns>, +** <table name> HIDDEN, +** docid HIDDEN, +** <langid> HIDDEN +** ); +** +** */ -static int fts3SegmentMerge( - Fts3Table *p, - int iLangid, /* Language id to merge */ - int iIndex, /* Index in p->aIndex[] to merge */ - int iLevel /* Level to merge */ +SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( + sqlite3_vtab *pVtab, /* FTS3 vtab object */ + int nArg, /* Size of argument array */ + sqlite3_value **apVal, /* Array of arguments */ + sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ ){ - int rc; /* Return code */ - int iIdx = 0; /* Index of new segment */ - sqlite3_int64 iNewLevel = 0; /* Level/index to create new segment at */ - SegmentWriter *pWriter = 0; /* Used to write the new, merged, segment */ - Fts3SegFilter filter; /* Segment term filter condition */ - Fts3MultiSegReader csr; /* Cursor to iterate through level(s) */ - int bIgnoreEmpty = 0; /* True to ignore empty segments */ - i64 iMaxLevel = 0; /* Max level number for this index/langid */ + Fts3Table *p = (Fts3Table *)pVtab; + int rc = SQLITE_OK; /* Return Code */ + int isRemove = 0; /* True for an UPDATE or DELETE */ + u32 *aSzIns = 0; /* Sizes of inserted documents */ + u32 *aSzDel = 0; /* Sizes of deleted documents */ + int nChng = 0; /* Net change in number of documents */ + int bInsertDone = 0; - assert( iLevel==FTS3_SEGCURSOR_ALL - || iLevel==FTS3_SEGCURSOR_PENDING - || iLevel>=0 - ); - assert( iLevel<FTS3_SEGDIR_MAXLEVEL ); - assert( iIndex>=0 && iIndex<p->nIndex ); + /* At this point it must be known if the %_stat table exists or not. + ** So bHasStat may not be 2. */ + assert( p->bHasStat==0 || p->bHasStat==1 ); - rc = sqlite3Fts3SegReaderCursor(p, iLangid, iIndex, iLevel, 0, 0, 1, 0, &csr); - if( rc!=SQLITE_OK || csr.nSegment==0 ) goto finished; + assert( p->pSegments==0 ); + assert( + nArg==1 /* DELETE operations */ + || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ + ); - if( iLevel!=FTS3_SEGCURSOR_PENDING ){ - rc = fts3SegmentMaxLevel(p, iLangid, iIndex, &iMaxLevel); - if( rc!=SQLITE_OK ) goto finished; + /* Check for a "special" INSERT operation. One of the form: + ** + ** INSERT INTO xyz(xyz) VALUES('command'); + */ + if( nArg>1 + && sqlite3_value_type(apVal[0])==SQLITE_NULL + && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL + ){ + rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); + goto update_out; } - if( iLevel==FTS3_SEGCURSOR_ALL ){ - /* This call is to merge all segments in the database to a single - ** segment. The level of the new segment is equal to the numerically - ** greatest segment level currently present in the database for this - ** index. The idx of the new segment is always 0. */ - if( csr.nSegment==1 ){ - rc = SQLITE_DONE; - goto finished; - } - iNewLevel = iMaxLevel; - bIgnoreEmpty = 1; + if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ + rc = SQLITE_CONSTRAINT; + goto update_out; + } - }else{ - /* This call is to merge all segments at level iLevel. find the next - ** available segment index at level iLevel+1. The call to - ** fts3AllocateSegdirIdx() will merge the segments at level iLevel+1 to - ** a single iLevel+2 segment if necessary. */ - assert( FTS3_SEGCURSOR_PENDING==-1 ); - iNewLevel = getAbsoluteLevel(p, iLangid, iIndex, iLevel+1); - rc = fts3AllocateSegdirIdx(p, iLangid, iIndex, iLevel+1, &iIdx); - bIgnoreEmpty = (iLevel!=FTS3_SEGCURSOR_PENDING) && (iNewLevel>iMaxLevel); + /* Allocate space to hold the change in document sizes */ + aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); + if( aSzDel==0 ){ + rc = SQLITE_NOMEM; + goto update_out; } - if( rc!=SQLITE_OK ) goto finished; + aSzIns = &aSzDel[p->nColumn+1]; + memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); - assert( csr.nSegment>0 ); - assert( iNewLevel>=getAbsoluteLevel(p, iLangid, iIndex, 0) ); - assert( iNewLevel<getAbsoluteLevel(p, iLangid, iIndex,FTS3_SEGDIR_MAXLEVEL) ); + rc = fts3Writelock(p); + if( rc!=SQLITE_OK ) goto update_out; - memset(&filter, 0, sizeof(Fts3SegFilter)); - filter.flags = FTS3_SEGMENT_REQUIRE_POS; - filter.flags |= (bIgnoreEmpty ? FTS3_SEGMENT_IGNORE_EMPTY : 0); + /* If this is an INSERT operation, or an UPDATE that modifies the rowid + ** value, then this operation requires constraint handling. + ** + ** If the on-conflict mode is REPLACE, this means that the existing row + ** should be deleted from the database before inserting the new row. Or, + ** if the on-conflict mode is other than REPLACE, then this method must + ** detect the conflict and return SQLITE_CONSTRAINT before beginning to + ** modify the database file. + */ + if( nArg>1 && p->zContentTbl==0 ){ + /* Find the value object that holds the new rowid value. */ + sqlite3_value *pNewRowid = apVal[3+p->nColumn]; + if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ + pNewRowid = apVal[1]; + } - rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); - while( SQLITE_OK==rc ){ - rc = sqlite3Fts3SegReaderStep(p, &csr); - if( rc!=SQLITE_ROW ) break; - rc = fts3SegWriterAdd(p, &pWriter, 1, - csr.zTerm, csr.nTerm, csr.aDoclist, csr.nDoclist); + if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( + sqlite3_value_type(apVal[0])==SQLITE_NULL + || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) + )){ + /* The new rowid is not NULL (in this case the rowid will be + ** automatically assigned and there is no chance of a conflict), and + ** the statement is either an INSERT or an UPDATE that modifies the + ** rowid column. So if the conflict mode is REPLACE, then delete any + ** existing row with rowid=pNewRowid. + ** + ** Or, if the conflict mode is not REPLACE, insert the new record into + ** the %_content table. If we hit the duplicate rowid constraint (or any + ** other error) while doing so, return immediately. + ** + ** This branch may also run if pNewRowid contains a value that cannot + ** be losslessly converted to an integer. In this case, the eventual + ** call to fts3InsertData() (either just below or further on in this + ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is + ** invoked, it will delete zero rows (since no row will have + ** docid=$pNewRowid if $pNewRowid is not an integer value). + */ + if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ + rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); + }else{ + rc = fts3InsertData(p, apVal, pRowid); + bInsertDone = 1; + } + } + } + if( rc!=SQLITE_OK ){ + goto update_out; } - if( rc!=SQLITE_OK ) goto finished; - assert( pWriter || bIgnoreEmpty ); - if( iLevel!=FTS3_SEGCURSOR_PENDING ){ - rc = fts3DeleteSegdir( - p, iLangid, iIndex, iLevel, csr.apSegment, csr.nSegment - ); - if( rc!=SQLITE_OK ) goto finished; + /* If this is a DELETE or UPDATE operation, remove the old record. */ + if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ + assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); + rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); + isRemove = 1; } - if( pWriter ){ - rc = fts3SegWriterFlush(p, pWriter, iNewLevel, iIdx); - if( rc==SQLITE_OK ){ - if( iLevel==FTS3_SEGCURSOR_PENDING || iNewLevel<iMaxLevel ){ - rc = fts3PromoteSegments(p, iNewLevel, pWriter->nLeafData); + + /* If this is an INSERT or UPDATE operation, insert the new record. */ + if( nArg>1 && rc==SQLITE_OK ){ + int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); + if( bInsertDone==0 ){ + rc = fts3InsertData(p, apVal, pRowid); + if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; } } + if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ + rc = fts3PendingTermsDocid(p, iLangid, *pRowid); + } + if( rc==SQLITE_OK ){ + assert( p->iPrevDocid==*pRowid ); + rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); + } + if( p->bHasDocsize ){ + fts3InsertDocsize(&rc, p, aSzIns); + } + nChng++; } - finished: - fts3SegWriterFree(pWriter); - sqlite3Fts3SegReaderFinish(&csr); + if( p->bFts4 ){ + fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); + } + + update_out: + sqlite3_free(aSzDel); + sqlite3Fts3SegmentsClose(p); return rc; } - /* -** Flush the contents of pendingTerms to level 0 segments. +** Flush any data in the pending-terms hash table to disk. If successful, +** merge all segments in the database (including the new segment, if +** there was any data to flush) into a single segment. */ -SQLITE_PRIVATE int sqlite3Fts3PendingTermsFlush(Fts3Table *p){ - int rc = SQLITE_OK; - int i; - - for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ - rc = fts3SegmentMerge(p, p->iPrevLangid, i, FTS3_SEGCURSOR_PENDING); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - sqlite3Fts3PendingTermsClear(p); - - /* Determine the auto-incr-merge setting if unknown. If enabled, - ** estimate the number of leaf blocks of content to be written - */ - if( rc==SQLITE_OK && p->bHasStat - && p->nAutoincrmerge==0xff && p->nLeafAdd>0 - ){ - sqlite3_stmt *pStmt = 0; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); - rc = sqlite3_step(pStmt); - if( rc==SQLITE_ROW ){ - p->nAutoincrmerge = sqlite3_column_int(pStmt, 0); - if( p->nAutoincrmerge==1 ) p->nAutoincrmerge = 8; - }else if( rc==SQLITE_DONE ){ - p->nAutoincrmerge = 0; - } - rc = sqlite3_reset(pStmt); +SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ + int rc; + rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3DoOptimize(p, 1); + if( rc==SQLITE_OK || rc==SQLITE_DONE ){ + int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); + if( rc2!=SQLITE_OK ) rc = rc2; + }else{ + sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); + sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); } } + sqlite3Fts3SegmentsClose(p); return rc; } +#endif + +/************** End of fts3_write.c ******************************************/ +/************** Begin file fts3_snippet.c ************************************/ /* -** Encode N integers as varints into a blob. +** 2009 Oct 23 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** */ -static void fts3EncodeIntArray( - int N, /* The number of integers to encode */ - u32 *a, /* The integer values */ - char *zBuf, /* Write the BLOB here */ - int *pNBuf /* Write number of bytes if zBuf[] used here */ -){ - int i, j; - for(i=j=0; i<N; i++){ - j += sqlite3Fts3PutVarint(&zBuf[j], (sqlite3_int64)a[i]); - } - *pNBuf = j; -} + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include <string.h> */ +/* #include <assert.h> */ /* -** Decode a blob of varints into N integers +** Characters that may appear in the second argument to matchinfo(). */ -static void fts3DecodeIntArray( - int N, /* The number of integers to decode */ - u32 *a, /* Write the integer values */ - const char *zBuf, /* The BLOB containing the varints */ - int nBuf /* size of the BLOB */ -){ - int i, j; - UNUSED_PARAMETER(nBuf); - for(i=j=0; i<N; i++){ - sqlite3_int64 x; - j += sqlite3Fts3GetVarint(&zBuf[j], &x); - assert(j<=nBuf); - a[i] = (u32)(x & 0xffffffff); - } -} +#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ +#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ +#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ +#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ +#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ +#define FTS3_MATCHINFO_LCS 's' /* nCol values */ +#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ +#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ +#define FTS3_MATCHINFO_LHITS_BM 'b' /* nCol*nPhrase values */ /* -** Insert the sizes (in tokens) for each column of the document -** with docid equal to p->iPrevDocid. The sizes are encoded as -** a blob of varints. +** The default value for the second argument to matchinfo(). */ -static void fts3InsertDocsize( - int *pRC, /* Result code */ - Fts3Table *p, /* Table into which to insert */ - u32 *aSz /* Sizes of each column, in tokens */ -){ - char *pBlob; /* The BLOB encoding of the document size */ - int nBlob; /* Number of bytes in the BLOB */ - sqlite3_stmt *pStmt; /* Statement used to insert the encoding */ - int rc; /* Result code from subfunctions */ +#define FTS3_MATCHINFO_DEFAULT "pcx" - if( *pRC ) return; - pBlob = sqlite3_malloc( 10*p->nColumn ); - if( pBlob==0 ){ - *pRC = SQLITE_NOMEM; - return; - } - fts3EncodeIntArray(p->nColumn, aSz, pBlob, &nBlob); - rc = fts3SqlStmt(p, SQL_REPLACE_DOCSIZE, &pStmt, 0); - if( rc ){ - sqlite3_free(pBlob); - *pRC = rc; - return; - } - sqlite3_bind_int64(pStmt, 1, p->iPrevDocid); - sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, sqlite3_free); - sqlite3_step(pStmt); - *pRC = sqlite3_reset(pStmt); -} /* -** Record 0 of the %_stat table contains a blob consisting of N varints, -** where N is the number of user defined columns in the fts3 table plus -** two. If nCol is the number of user defined columns, then values of the -** varints are set as follows: -** -** Varint 0: Total number of rows in the table. -** -** Varint 1..nCol: For each column, the total number of tokens stored in -** the column for all rows of the table. -** -** Varint 1+nCol: The total size, in bytes, of all text values in all -** columns of all rows of the table. -** +** Used as an fts3ExprIterate() context when loading phrase doclists to +** Fts3Expr.aDoclist[]/nDoclist. */ -static void fts3UpdateDocTotals( - int *pRC, /* The result code */ - Fts3Table *p, /* Table being updated */ - u32 *aSzIns, /* Size increases */ - u32 *aSzDel, /* Size decreases */ - int nChng /* Change in the number of documents */ -){ - char *pBlob; /* Storage for BLOB written into %_stat */ - int nBlob; /* Size of BLOB written into %_stat */ - u32 *a; /* Array of integers that becomes the BLOB */ - sqlite3_stmt *pStmt; /* Statement for reading and writing */ - int i; /* Loop counter */ - int rc; /* Result code from subfunctions */ +typedef struct LoadDoclistCtx LoadDoclistCtx; +struct LoadDoclistCtx { + Fts3Cursor *pCsr; /* FTS3 Cursor */ + int nPhrase; /* Number of phrases seen so far */ + int nToken; /* Number of tokens seen so far */ +}; - const int nStat = p->nColumn+2; +/* +** The following types are used as part of the implementation of the +** fts3BestSnippet() routine. +*/ +typedef struct SnippetIter SnippetIter; +typedef struct SnippetPhrase SnippetPhrase; +typedef struct SnippetFragment SnippetFragment; - if( *pRC ) return; - a = sqlite3_malloc( (sizeof(u32)+10)*nStat ); - if( a==0 ){ - *pRC = SQLITE_NOMEM; - return; - } - pBlob = (char*)&a[nStat]; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pStmt, 0); - if( rc ){ - sqlite3_free(a); - *pRC = rc; - return; +struct SnippetIter { + Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ + int iCol; /* Extract snippet from this column */ + int nSnippet; /* Requested snippet length (in tokens) */ + int nPhrase; /* Number of phrases in query */ + SnippetPhrase *aPhrase; /* Array of size nPhrase */ + int iCurrent; /* First token of current snippet */ +}; + +struct SnippetPhrase { + int nToken; /* Number of tokens in phrase */ + char *pList; /* Pointer to start of phrase position list */ + int iHead; /* Next value in position list */ + char *pHead; /* Position list data following iHead */ + int iTail; /* Next value in trailing position list */ + char *pTail; /* Position list data following iTail */ +}; + +struct SnippetFragment { + int iCol; /* Column snippet is extracted from */ + int iPos; /* Index of first token in snippet */ + u64 covered; /* Mask of query phrases covered */ + u64 hlmask; /* Mask of snippet terms to highlight */ +}; + +/* +** This type is used as an fts3ExprIterate() context object while +** accumulating the data returned by the matchinfo() function. +*/ +typedef struct MatchInfo MatchInfo; +struct MatchInfo { + Fts3Cursor *pCursor; /* FTS3 Cursor */ + int nCol; /* Number of columns in table */ + int nPhrase; /* Number of matchable phrases in query */ + sqlite3_int64 nDoc; /* Number of docs in database */ + char flag; + u32 *aMatchinfo; /* Pre-allocated buffer */ +}; + +/* +** An instance of this structure is used to manage a pair of buffers, each +** (nElem * sizeof(u32)) bytes in size. See the MatchinfoBuffer code below +** for details. +*/ +struct MatchinfoBuffer { + u8 aRef[3]; + int nElem; + int bGlobal; /* Set if global data is loaded */ + char *zMatchinfo; + u32 aMatchinfo[1]; +}; + + +/* +** The snippet() and offsets() functions both return text values. An instance +** of the following structure is used to accumulate those values while the +** functions are running. See fts3StringAppend() for details. +*/ +typedef struct StrBuffer StrBuffer; +struct StrBuffer { + char *z; /* Pointer to buffer containing string */ + int n; /* Length of z in bytes (excl. nul-term) */ + int nAlloc; /* Allocated size of buffer z in bytes */ +}; + + +/************************************************************************* +** Start of MatchinfoBuffer code. +*/ + +/* +** Allocate a two-slot MatchinfoBuffer object. +*/ +static MatchinfoBuffer *fts3MIBufferNew(int nElem, const char *zMatchinfo){ + MatchinfoBuffer *pRet; + int nByte = sizeof(u32) * (2*nElem + 1) + sizeof(MatchinfoBuffer); + int nStr = (int)strlen(zMatchinfo); + + pRet = sqlite3_malloc(nByte + nStr+1); + if( pRet ){ + memset(pRet, 0, nByte); + pRet->aMatchinfo[0] = (u8*)(&pRet->aMatchinfo[1]) - (u8*)pRet; + pRet->aMatchinfo[1+nElem] = pRet->aMatchinfo[0] + sizeof(u32)*(nElem+1); + pRet->nElem = nElem; + pRet->zMatchinfo = ((char*)pRet) + nByte; + memcpy(pRet->zMatchinfo, zMatchinfo, nStr+1); + pRet->aRef[0] = 1; } - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - if( sqlite3_step(pStmt)==SQLITE_ROW ){ - fts3DecodeIntArray(nStat, a, - sqlite3_column_blob(pStmt, 0), - sqlite3_column_bytes(pStmt, 0)); + + return pRet; +} + +static void fts3MIBufferFree(void *p){ + MatchinfoBuffer *pBuf = (MatchinfoBuffer*)((u8*)p - ((u32*)p)[-1]); + + assert( (u32*)p==&pBuf->aMatchinfo[1] + || (u32*)p==&pBuf->aMatchinfo[pBuf->nElem+2] + ); + if( (u32*)p==&pBuf->aMatchinfo[1] ){ + pBuf->aRef[1] = 0; }else{ - memset(a, 0, sizeof(u32)*(nStat) ); + pBuf->aRef[2] = 0; } - rc = sqlite3_reset(pStmt); - if( rc!=SQLITE_OK ){ - sqlite3_free(a); - *pRC = rc; - return; + + if( pBuf->aRef[0]==0 && pBuf->aRef[1]==0 && pBuf->aRef[2]==0 ){ + sqlite3_free(pBuf); } - if( nChng<0 && a[0]<(u32)(-nChng) ){ - a[0] = 0; - }else{ - a[0] += nChng; +} + +static void (*fts3MIBufferAlloc(MatchinfoBuffer *p, u32 **paOut))(void*){ + void (*xRet)(void*) = 0; + u32 *aOut = 0; + + if( p->aRef[1]==0 ){ + p->aRef[1] = 1; + aOut = &p->aMatchinfo[1]; + xRet = fts3MIBufferFree; } - for(i=0; i<p->nColumn+1; i++){ - u32 x = a[i+1]; - if( x+aSzIns[i] < aSzDel[i] ){ - x = 0; - }else{ - x = x + aSzIns[i] - aSzDel[i]; + else if( p->aRef[2]==0 ){ + p->aRef[2] = 1; + aOut = &p->aMatchinfo[p->nElem+2]; + xRet = fts3MIBufferFree; + }else{ + aOut = (u32*)sqlite3_malloc(p->nElem * sizeof(u32)); + if( aOut ){ + xRet = sqlite3_free; + if( p->bGlobal ) memcpy(aOut, &p->aMatchinfo[1], p->nElem*sizeof(u32)); } - a[i+1] = x; - } - fts3EncodeIntArray(nStat, a, pBlob, &nBlob); - rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); - if( rc ){ - sqlite3_free(a); - *pRC = rc; - return; } - sqlite3_bind_int(pStmt, 1, FTS_STAT_DOCTOTAL); - sqlite3_bind_blob(pStmt, 2, pBlob, nBlob, SQLITE_STATIC); - sqlite3_step(pStmt); - *pRC = sqlite3_reset(pStmt); - sqlite3_free(a); + + *paOut = aOut; + return xRet; +} + +static void fts3MIBufferSetGlobal(MatchinfoBuffer *p){ + p->bGlobal = 1; + memcpy(&p->aMatchinfo[2+p->nElem], &p->aMatchinfo[1], p->nElem*sizeof(u32)); } /* -** Merge the entire database so that there is one segment for each -** iIndex/iLangid combination. +** Free a MatchinfoBuffer object allocated using fts3MIBufferNew() */ -static int fts3DoOptimize(Fts3Table *p, int bReturnDone){ - int bSeenDone = 0; - int rc; - sqlite3_stmt *pAllLangid = 0; - - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); - sqlite3_bind_int(pAllLangid, 2, p->nIndex); - while( sqlite3_step(pAllLangid)==SQLITE_ROW ){ - int i; - int iLangid = sqlite3_column_int(pAllLangid, 0); - for(i=0; rc==SQLITE_OK && i<p->nIndex; i++){ - rc = fts3SegmentMerge(p, iLangid, i, FTS3_SEGCURSOR_ALL); - if( rc==SQLITE_DONE ){ - bSeenDone = 1; - rc = SQLITE_OK; - } - } +SQLITE_PRIVATE void sqlite3Fts3MIBufferFree(MatchinfoBuffer *p){ + if( p ){ + assert( p->aRef[0]==1 ); + p->aRef[0] = 0; + if( p->aRef[0]==0 && p->aRef[1]==0 && p->aRef[2]==0 ){ + sqlite3_free(p); } - rc2 = sqlite3_reset(pAllLangid); - if( rc==SQLITE_OK ) rc = rc2; } +} - sqlite3Fts3SegmentsClose(p); - sqlite3Fts3PendingTermsClear(p); +/* +** End of MatchinfoBuffer code. +*************************************************************************/ - return (rc==SQLITE_OK && bReturnDone && bSeenDone) ? SQLITE_DONE : rc; -} /* -** This function is called when the user executes the following statement: +** This function is used to help iterate through a position-list. A position +** list is a list of unique integers, sorted from smallest to largest. Each +** element of the list is represented by an FTS3 varint that takes the value +** of the difference between the current element and the previous one plus +** two. For example, to store the position-list: ** -** INSERT INTO <tbl>(<tbl>) VALUES('rebuild'); +** 4 9 113 ** -** The entire FTS index is discarded and rebuilt. If the table is one -** created using the content=xxx option, then the new index is based on -** the current contents of the xxx table. Otherwise, it is rebuilt based -** on the contents of the %_content table. +** the three varints: +** +** 6 7 106 +** +** are encoded. +** +** When this function is called, *pp points to the start of an element of +** the list. *piPos contains the value of the previous entry in the list. +** After it returns, *piPos contains the value of the next element of the +** list and *pp is advanced to the following varint. */ -static int fts3DoRebuild(Fts3Table *p){ - int rc; /* Return Code */ - - rc = fts3DeleteAll(p, 0); - if( rc==SQLITE_OK ){ - u32 *aSz = 0; - u32 *aSzIns = 0; - u32 *aSzDel = 0; - sqlite3_stmt *pStmt = 0; - int nEntry = 0; - - /* Compose and prepare an SQL statement to loop through the content table */ - char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); - sqlite3_free(zSql); - } - - if( rc==SQLITE_OK ){ - int nByte = sizeof(u32) * (p->nColumn+1)*3; - aSz = (u32 *)sqlite3_malloc(nByte); - if( aSz==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(aSz, 0, nByte); - aSzIns = &aSz[p->nColumn+1]; - aSzDel = &aSzIns[p->nColumn+1]; - } - } - - while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ - int iCol; - int iLangid = langidFromSelect(p, pStmt); - rc = fts3PendingTermsDocid(p, iLangid, sqlite3_column_int64(pStmt, 0)); - memset(aSz, 0, sizeof(aSz[0]) * (p->nColumn+1)); - for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ - if( p->abNotindexed[iCol]==0 ){ - const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1); - rc = fts3PendingTermsAdd(p, iLangid, z, iCol, &aSz[iCol]); - aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1); - } - } - if( p->bHasDocsize ){ - fts3InsertDocsize(&rc, p, aSz); - } - if( rc!=SQLITE_OK ){ - sqlite3_finalize(pStmt); - pStmt = 0; - }else{ - nEntry++; - for(iCol=0; iCol<=p->nColumn; iCol++){ - aSzIns[iCol] += aSz[iCol]; - } - } - } - if( p->bFts4 ){ - fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry); - } - sqlite3_free(aSz); - - if( pStmt ){ - int rc2 = sqlite3_finalize(pStmt); - if( rc==SQLITE_OK ){ - rc = rc2; - } - } - } - - return rc; +static void fts3GetDeltaPosition(char **pp, int *piPos){ + int iVal; + *pp += fts3GetVarint32(*pp, &iVal); + *piPos += (iVal-2); } - /* -** This function opens a cursor used to read the input data for an -** incremental merge operation. Specifically, it opens a cursor to scan -** the oldest nSeg segments (idx=0 through idx=(nSeg-1)) in absolute -** level iAbsLevel. +** Helper function for fts3ExprIterate() (see below). */ -static int fts3IncrmergeCsr( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level to open */ - int nSeg, /* Number of segments to merge */ - Fts3MultiSegReader *pCsr /* Cursor object to populate */ +static int fts3ExprIterate2( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int *piPhrase, /* Pointer to phrase counter */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ ){ - int rc; /* Return Code */ - sqlite3_stmt *pStmt = 0; /* Statement used to read %_segdir entry */ - int nByte; /* Bytes allocated at pCsr->apSegment[] */ - - /* Allocate space for the Fts3MultiSegReader.aCsr[] array */ - memset(pCsr, 0, sizeof(*pCsr)); - nByte = sizeof(Fts3SegReader *) * nSeg; - pCsr->apSegment = (Fts3SegReader **)sqlite3_malloc(nByte); + int rc; /* Return code */ + int eType = pExpr->eType; /* Type of expression node pExpr */ - if( pCsr->apSegment==0 ){ - rc = SQLITE_NOMEM; - }else{ - memset(pCsr->apSegment, 0, nByte); - rc = fts3SqlStmt(p, SQL_SELECT_LEVEL, &pStmt, 0); - } - if( rc==SQLITE_OK ){ - int i; - int rc2; - sqlite3_bind_int64(pStmt, 1, iAbsLevel); - assert( pCsr->nSegment==0 ); - for(i=0; rc==SQLITE_OK && sqlite3_step(pStmt)==SQLITE_ROW && i<nSeg; i++){ - rc = sqlite3Fts3SegReaderNew(i, 0, - sqlite3_column_int64(pStmt, 1), /* segdir.start_block */ - sqlite3_column_int64(pStmt, 2), /* segdir.leaves_end_block */ - sqlite3_column_int64(pStmt, 3), /* segdir.end_block */ - sqlite3_column_blob(pStmt, 4), /* segdir.root */ - sqlite3_column_bytes(pStmt, 4), /* segdir.root */ - &pCsr->apSegment[i] - ); - pCsr->nSegment++; + if( eType!=FTSQUERY_PHRASE ){ + assert( pExpr->pLeft && pExpr->pRight ); + rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); + if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ + rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); } - rc2 = sqlite3_reset(pStmt); - if( rc==SQLITE_OK ) rc = rc2; + }else{ + rc = x(pExpr, *piPhrase, pCtx); + (*piPhrase)++; } - return rc; } -typedef struct IncrmergeWriter IncrmergeWriter; -typedef struct NodeWriter NodeWriter; -typedef struct Blob Blob; -typedef struct NodeReader NodeReader; - /* -** An instance of the following structure is used as a dynamic buffer -** to build up nodes or other blobs of data in. +** Iterate through all phrase nodes in an FTS3 query, except those that +** are part of a sub-tree that is the right-hand-side of a NOT operator. +** For each phrase node found, the supplied callback function is invoked. ** -** The function blobGrowBuffer() is used to extend the allocation. +** If the callback function returns anything other than SQLITE_OK, +** the iteration is abandoned and the error code returned immediately. +** Otherwise, SQLITE_OK is returned after a callback has been made for +** all eligible phrase nodes. */ -struct Blob { - char *a; /* Pointer to allocation */ - int n; /* Number of valid bytes of data in a[] */ - int nAlloc; /* Allocated size of a[] (nAlloc>=n) */ -}; +static int fts3ExprIterate( + Fts3Expr *pExpr, /* Expression to iterate phrases of */ + int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ + void *pCtx /* Second argument to pass to callback */ +){ + int iPhrase = 0; /* Variable used as the phrase counter */ + return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); +} -/* -** This structure is used to build up buffers containing segment b-tree -** nodes (blocks). -*/ -struct NodeWriter { - sqlite3_int64 iBlock; /* Current block id */ - Blob key; /* Last key written to the current block */ - Blob block; /* Current block image */ -}; /* -** An object of this type contains the state required to create or append -** to an appendable b-tree segment. -*/ -struct IncrmergeWriter { - int nLeafEst; /* Space allocated for leaf blocks */ - int nWork; /* Number of leaf pages flushed */ - sqlite3_int64 iAbsLevel; /* Absolute level of input segments */ - int iIdx; /* Index of *output* segment in iAbsLevel+1 */ - sqlite3_int64 iStart; /* Block number of first allocated block */ - sqlite3_int64 iEnd; /* Block number of last allocated block */ - sqlite3_int64 nLeafData; /* Bytes of leaf page data so far */ - u8 bNoLeafData; /* If true, store 0 for segment size */ - NodeWriter aNodeWriter[FTS_MAX_APPENDABLE_HEIGHT]; -}; +** This is an fts3ExprIterate() callback used while loading the doclists +** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also +** fts3ExprLoadDoclists(). +*/ +static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + int rc = SQLITE_OK; + Fts3Phrase *pPhrase = pExpr->pPhrase; + LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; + + UNUSED_PARAMETER(iPhrase); + + p->nPhrase++; + p->nToken += pPhrase->nToken; + + return rc; +} /* -** An object of the following type is used to read data from a single -** FTS segment node. See the following functions: +** Load the doclists for each phrase in the query associated with FTS3 cursor +** pCsr. ** -** nodeReaderInit() -** nodeReaderNext() -** nodeReaderRelease() +** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable +** phrases in the expression (all phrases except those directly or +** indirectly descended from the right-hand-side of a NOT operator). If +** pnToken is not NULL, then it is set to the number of tokens in all +** matchable phrases of the expression. */ -struct NodeReader { - const char *aNode; - int nNode; - int iOff; /* Current offset within aNode[] */ +static int fts3ExprLoadDoclists( + Fts3Cursor *pCsr, /* Fts3 cursor for current query */ + int *pnPhrase, /* OUT: Number of phrases in query */ + int *pnToken /* OUT: Number of tokens in query */ +){ + int rc; /* Return Code */ + LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ + sCtx.pCsr = pCsr; + rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); + if( pnPhrase ) *pnPhrase = sCtx.nPhrase; + if( pnToken ) *pnToken = sCtx.nToken; + return rc; +} - /* Output variables. Containing the current node entry. */ - sqlite3_int64 iChild; /* Pointer to child node */ - Blob term; /* Current term */ - const char *aDoclist; /* Pointer to doclist */ - int nDoclist; /* Size of doclist in bytes */ -}; +static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ + (*(int *)ctx)++; + pExpr->iPhrase = iPhrase; + return SQLITE_OK; +} +static int fts3ExprPhraseCount(Fts3Expr *pExpr){ + int nPhrase = 0; + (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); + return nPhrase; +} /* -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, if the allocation at pBlob->a is not already at least nMin -** bytes in size, extend (realloc) it to be so. -** -** If an OOM error occurs, set *pRc to SQLITE_NOMEM and leave pBlob->a -** unmodified. Otherwise, if the allocation succeeds, update pBlob->nAlloc -** to reflect the new size of the pBlob->a[] buffer. +** Advance the position list iterator specified by the first two +** arguments so that it points to the first element with a value greater +** than or equal to parameter iNext. */ -static void blobGrowBuffer(Blob *pBlob, int nMin, int *pRc){ - if( *pRc==SQLITE_OK && nMin>pBlob->nAlloc ){ - int nAlloc = nMin; - char *a = (char *)sqlite3_realloc(pBlob->a, nAlloc); - if( a ){ - pBlob->nAlloc = nAlloc; - pBlob->a = a; - }else{ - *pRc = SQLITE_NOMEM; +static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ + char *pIter = *ppIter; + if( pIter ){ + int iIter = *piIter; + + while( iIter<iNext ){ + if( 0==(*pIter & 0xFE) ){ + iIter = -1; + pIter = 0; + break; + } + fts3GetDeltaPosition(&pIter, &iIter); } + + *piIter = iIter; + *ppIter = pIter; } } /* -** Attempt to advance the node-reader object passed as the first argument to -** the next entry on the node. -** -** Return an error code if an error occurs (SQLITE_NOMEM is possible). -** Otherwise return SQLITE_OK. If there is no next entry on the node -** (e.g. because the current entry is the last) set NodeReader->aNode to -** NULL to indicate EOF. Otherwise, populate the NodeReader structure output -** variables for the new entry. +** Advance the snippet iterator to the next candidate snippet. */ -static int nodeReaderNext(NodeReader *p){ - int bFirst = (p->term.n==0); /* True for first term on the node */ - int nPrefix = 0; /* Bytes to copy from previous term */ - int nSuffix = 0; /* Bytes to append to the prefix */ - int rc = SQLITE_OK; /* Return code */ +static int fts3SnippetNextCandidate(SnippetIter *pIter){ + int i; /* Loop counter */ - assert( p->aNode ); - if( p->iChild && bFirst==0 ) p->iChild++; - if( p->iOff>=p->nNode ){ - /* EOF */ - p->aNode = 0; - }else{ - if( bFirst==0 ){ - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nPrefix); + if( pIter->iCurrent<0 ){ + /* The SnippetIter object has just been initialized. The first snippet + ** candidate always starts at offset 0 (even if this candidate has a + ** score of 0.0). + */ + pIter->iCurrent = 0; + + /* Advance the 'head' iterator of each phrase to the first offset that + ** is greater than or equal to (iNext+nSnippet). + */ + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); } - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &nSuffix); + }else{ + int iStart; + int iEnd = 0x7FFFFFFF; - blobGrowBuffer(&p->term, nPrefix+nSuffix, &rc); - if( rc==SQLITE_OK ){ - memcpy(&p->term.a[nPrefix], &p->aNode[p->iOff], nSuffix); - p->term.n = nPrefix+nSuffix; - p->iOff += nSuffix; - if( p->iChild==0 ){ - p->iOff += fts3GetVarint32(&p->aNode[p->iOff], &p->nDoclist); - p->aDoclist = &p->aNode[p->iOff]; - p->iOff += p->nDoclist; + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pHead && pPhrase->iHead<iEnd ){ + iEnd = pPhrase->iHead; } } - } + if( iEnd==0x7FFFFFFF ){ + return 1; + } - assert( p->iOff<=p->nNode ); + pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); + fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); + } + } - return rc; + return 0; } /* -** Release all dynamic resources held by node-reader object *p. +** Retrieve information about the current candidate snippet of snippet +** iterator pIter. */ -static void nodeReaderRelease(NodeReader *p){ - sqlite3_free(p->term.a); +static void fts3SnippetDetails( + SnippetIter *pIter, /* Snippet iterator */ + u64 mCovered, /* Bitmask of phrases already covered */ + int *piToken, /* OUT: First token of proposed snippet */ + int *piScore, /* OUT: "Score" for this snippet */ + u64 *pmCover, /* OUT: Bitmask of phrases covered */ + u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ +){ + int iStart = pIter->iCurrent; /* First token of snippet */ + int iScore = 0; /* Score of this snippet */ + int i; /* Loop counter */ + u64 mCover = 0; /* Mask of phrases covered by this snippet */ + u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ + + for(i=0; i<pIter->nPhrase; i++){ + SnippetPhrase *pPhrase = &pIter->aPhrase[i]; + if( pPhrase->pTail ){ + char *pCsr = pPhrase->pTail; + int iCsr = pPhrase->iTail; + + while( iCsr<(iStart+pIter->nSnippet) ){ + int j; + u64 mPhrase = (u64)1 << i; + u64 mPos = (u64)1 << (iCsr - iStart); + assert( iCsr>=iStart ); + if( (mCover|mCovered)&mPhrase ){ + iScore++; + }else{ + iScore += 1000; + } + mCover |= mPhrase; + + for(j=0; j<pPhrase->nToken; j++){ + mHighlight |= (mPos>>j); + } + + if( 0==(*pCsr & 0x0FE) ) break; + fts3GetDeltaPosition(&pCsr, &iCsr); + } + } + } + + /* Set the output variables before returning. */ + *piToken = iStart; + *piScore = iScore; + *pmCover = mCover; + *pmHighlight = mHighlight; } /* -** Initialize a node-reader object to read the node in buffer aNode/nNode. -** -** If successful, SQLITE_OK is returned and the NodeReader object set to -** point to the first entry on the node (if any). Otherwise, an SQLite -** error code is returned. +** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). +** Each invocation populates an element of the SnippetIter.aPhrase[] array. */ -static int nodeReaderInit(NodeReader *p, const char *aNode, int nNode){ - memset(p, 0, sizeof(NodeReader)); - p->aNode = aNode; - p->nNode = nNode; +static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ + SnippetIter *p = (SnippetIter *)ctx; + SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; + char *pCsr; + int rc; - /* Figure out if this is a leaf or an internal node. */ - if( p->aNode[0] ){ - /* An internal node. */ - p->iOff = 1 + sqlite3Fts3GetVarint(&p->aNode[1], &p->iChild); + pPhrase->nToken = pExpr->pPhrase->nToken; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); + assert( rc==SQLITE_OK || pCsr==0 ); + if( pCsr ){ + int iFirst = 0; + pPhrase->pList = pCsr; + fts3GetDeltaPosition(&pCsr, &iFirst); + assert( iFirst>=0 ); + pPhrase->pHead = pCsr; + pPhrase->pTail = pCsr; + pPhrase->iHead = iFirst; + pPhrase->iTail = iFirst; }else{ - p->iOff = 1; + assert( rc!=SQLITE_OK || ( + pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 + )); } - return nodeReaderNext(p); + return rc; } /* -** This function is called while writing an FTS segment each time a leaf o -** node is finished and written to disk. The key (zTerm/nTerm) is guaranteed -** to be greater than the largest key on the node just written, but smaller -** than or equal to the first key that will be written to the next leaf -** node. +** Select the fragment of text consisting of nFragment contiguous tokens +** from column iCol that represent the "best" snippet. The best snippet +** is the snippet with the highest score, where scores are calculated +** by adding: ** -** The block id of the leaf node just written to disk may be found in -** (pWriter->aNodeWriter[0].iBlock) when this function is called. +** (a) +1 point for each occurrence of a matchable phrase in the snippet. +** +** (b) +1000 points for the first occurrence of each matchable phrase in +** the snippet for which the corresponding mCovered bit is not set. +** +** The selected snippet parameters are stored in structure *pFragment before +** returning. The score of the selected snippet is stored in *piScore +** before returning. */ -static int fts3IncrmergePush( - Fts3Table *p, /* Fts3 table handle */ - IncrmergeWriter *pWriter, /* Writer object */ - const char *zTerm, /* Term to write to internal node */ - int nTerm /* Bytes at zTerm */ +static int fts3BestSnippet( + int nSnippet, /* Desired snippet length */ + Fts3Cursor *pCsr, /* Cursor to create snippet for */ + int iCol, /* Index of column to create snippet from */ + u64 mCovered, /* Mask of phrases already covered */ + u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ + SnippetFragment *pFragment, /* OUT: Best snippet found */ + int *piScore /* OUT: Score of snippet pFragment */ ){ - sqlite3_int64 iPtr = pWriter->aNodeWriter[0].iBlock; - int iLayer; - - assert( nTerm>0 ); - for(iLayer=1; ALWAYS(iLayer<FTS_MAX_APPENDABLE_HEIGHT); iLayer++){ - sqlite3_int64 iNextPtr = 0; - NodeWriter *pNode = &pWriter->aNodeWriter[iLayer]; - int rc = SQLITE_OK; - int nPrefix; - int nSuffix; - int nSpace; + int rc; /* Return Code */ + int nList; /* Number of phrases in expression */ + SnippetIter sIter; /* Iterates through snippet candidates */ + int nByte; /* Number of bytes of space to allocate */ + int iBestScore = -1; /* Best snippet score found so far */ + int i; /* Loop counter */ - /* Figure out how much space the key will consume if it is written to - ** the current node of layer iLayer. Due to the prefix compression, - ** the space required changes depending on which node the key is to - ** be added to. */ - nPrefix = fts3PrefixCompress(pNode->key.a, pNode->key.n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; - nSpace = sqlite3Fts3VarintLen(nPrefix); - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; + memset(&sIter, 0, sizeof(sIter)); - if( pNode->key.n==0 || (pNode->block.n + nSpace)<=p->nNodeSize ){ - /* If the current node of layer iLayer contains zero keys, or if adding - ** the key to it will not cause it to grow to larger than nNodeSize - ** bytes in size, write the key here. */ + /* Iterate through the phrases in the expression to count them. The same + ** callback makes sure the doclists are loaded for each phrase. + */ + rc = fts3ExprLoadDoclists(pCsr, &nList, 0); + if( rc!=SQLITE_OK ){ + return rc; + } - Blob *pBlk = &pNode->block; - if( pBlk->n==0 ){ - blobGrowBuffer(pBlk, p->nNodeSize, &rc); - if( rc==SQLITE_OK ){ - pBlk->a[0] = (char)iLayer; - pBlk->n = 1 + sqlite3Fts3PutVarint(&pBlk->a[1], iPtr); - } - } - blobGrowBuffer(pBlk, pBlk->n + nSpace, &rc); - blobGrowBuffer(&pNode->key, nTerm, &rc); + /* Now that it is known how many phrases there are, allocate and zero + ** the required space using malloc(). + */ + nByte = sizeof(SnippetPhrase) * nList; + sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); + if( !sIter.aPhrase ){ + return SQLITE_NOMEM; + } + memset(sIter.aPhrase, 0, nByte); - if( rc==SQLITE_OK ){ - if( pNode->key.n ){ - pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nPrefix); - } - pBlk->n += sqlite3Fts3PutVarint(&pBlk->a[pBlk->n], nSuffix); - memcpy(&pBlk->a[pBlk->n], &zTerm[nPrefix], nSuffix); - pBlk->n += nSuffix; + /* Initialize the contents of the SnippetIter object. Then iterate through + ** the set of phrases in the expression to populate the aPhrase[] array. + */ + sIter.pCsr = pCsr; + sIter.iCol = iCol; + sIter.nSnippet = nSnippet; + sIter.nPhrase = nList; + sIter.iCurrent = -1; + rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void*)&sIter); + if( rc==SQLITE_OK ){ - memcpy(pNode->key.a, zTerm, nTerm); - pNode->key.n = nTerm; + /* Set the *pmSeen output variable. */ + for(i=0; i<nList; i++){ + if( sIter.aPhrase[i].pHead ){ + *pmSeen |= (u64)1 << i; } - }else{ - /* Otherwise, flush the current node of layer iLayer to disk. - ** Then allocate a new, empty sibling node. The key will be written - ** into the parent of this node. */ - rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); - - assert( pNode->block.nAlloc>=p->nNodeSize ); - pNode->block.a[0] = (char)iLayer; - pNode->block.n = 1 + sqlite3Fts3PutVarint(&pNode->block.a[1], iPtr+1); + } - iNextPtr = pNode->iBlock; - pNode->iBlock++; - pNode->key.n = 0; + /* Loop through all candidate snippets. Store the best snippet in + ** *pFragment. Store its associated 'score' in iBestScore. + */ + pFragment->iCol = iCol; + while( !fts3SnippetNextCandidate(&sIter) ){ + int iPos; + int iScore; + u64 mCover; + u64 mHighlite; + fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); + assert( iScore>=0 ); + if( iScore>iBestScore ){ + pFragment->iPos = iPos; + pFragment->hlmask = mHighlite; + pFragment->covered = mCover; + iBestScore = iScore; + } } - if( rc!=SQLITE_OK || iNextPtr==0 ) return rc; - iPtr = iNextPtr; + *piScore = iBestScore; } - - assert( 0 ); - return 0; + sqlite3_free(sIter.aPhrase); + return rc; } + /* -** Append a term and (optionally) doclist to the FTS segment node currently -** stored in blob *pNode. The node need not contain any terms, but the -** header must be written before this function is called. -** -** A node header is a single 0x00 byte for a leaf node, or a height varint -** followed by the left-hand-child varint for an internal node. -** -** The term to be appended is passed via arguments zTerm/nTerm. For a -** leaf node, the doclist is passed as aDoclist/nDoclist. For an internal -** node, both aDoclist and nDoclist must be passed 0. -** -** If the size of the value in blob pPrev is zero, then this is the first -** term written to the node. Otherwise, pPrev contains a copy of the -** previous term. Before this function returns, it is updated to contain a -** copy of zTerm/nTerm. -** -** It is assumed that the buffer associated with pNode is already large -** enough to accommodate the new entry. The buffer associated with pPrev -** is extended by this function if requrired. +** Append a string to the string-buffer passed as the first argument. ** -** If an error (i.e. OOM condition) occurs, an SQLite error code is -** returned. Otherwise, SQLITE_OK. +** If nAppend is negative, then the length of the string zAppend is +** determined using strlen(). */ -static int fts3AppendToNode( - Blob *pNode, /* Current node image to append to */ - Blob *pPrev, /* Buffer containing previous term written */ - const char *zTerm, /* New term to write */ - int nTerm, /* Size of zTerm in bytes */ - const char *aDoclist, /* Doclist (or NULL) to write */ - int nDoclist /* Size of aDoclist in bytes */ +static int fts3StringAppend( + StrBuffer *pStr, /* Buffer to append to */ + const char *zAppend, /* Pointer to data to append to buffer */ + int nAppend /* Size of zAppend in bytes (or -1) */ ){ - int rc = SQLITE_OK; /* Return code */ - int bFirst = (pPrev->n==0); /* True if this is the first term written */ - int nPrefix; /* Size of term prefix in bytes */ - int nSuffix; /* Size of term suffix in bytes */ - - /* Node must have already been started. There must be a doclist for a - ** leaf node, and there must not be a doclist for an internal node. */ - assert( pNode->n>0 ); - assert( (pNode->a[0]=='\0')==(aDoclist!=0) ); - - blobGrowBuffer(pPrev, nTerm, &rc); - if( rc!=SQLITE_OK ) return rc; - - nPrefix = fts3PrefixCompress(pPrev->a, pPrev->n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; - memcpy(pPrev->a, zTerm, nTerm); - pPrev->n = nTerm; - - if( bFirst==0 ){ - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nPrefix); + if( nAppend<0 ){ + nAppend = (int)strlen(zAppend); } - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nSuffix); - memcpy(&pNode->a[pNode->n], &zTerm[nPrefix], nSuffix); - pNode->n += nSuffix; - if( aDoclist ){ - pNode->n += sqlite3Fts3PutVarint(&pNode->a[pNode->n], nDoclist); - memcpy(&pNode->a[pNode->n], aDoclist, nDoclist); - pNode->n += nDoclist; + /* If there is insufficient space allocated at StrBuffer.z, use realloc() + ** to grow the buffer until so that it is big enough to accomadate the + ** appended data. + */ + if( pStr->n+nAppend+1>=pStr->nAlloc ){ + int nAlloc = pStr->nAlloc+nAppend+100; + char *zNew = sqlite3_realloc(pStr->z, nAlloc); + if( !zNew ){ + return SQLITE_NOMEM; + } + pStr->z = zNew; + pStr->nAlloc = nAlloc; } + assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); - assert( pNode->n<=pNode->nAlloc ); + /* Append the data to the string buffer. */ + memcpy(&pStr->z[pStr->n], zAppend, nAppend); + pStr->n += nAppend; + pStr->z[pStr->n] = '\0'; return SQLITE_OK; } /* -** Append the current term and doclist pointed to by cursor pCsr to the -** appendable b-tree segment opened for writing by pWriter. +** The fts3BestSnippet() function often selects snippets that end with a +** query term. That is, the final term of the snippet is always a term +** that requires highlighting. For example, if 'X' is a highlighted term +** and '.' is a non-highlighted term, BestSnippet() may select: ** -** Return SQLITE_OK if successful, or an SQLite error code otherwise. +** ........X.....X +** +** This function "shifts" the beginning of the snippet forward in the +** document so that there are approximately the same number of +** non-highlighted terms to the right of the final highlighted term as there +** are to the left of the first highlighted term. For example, to this: +** +** ....X.....X.... +** +** This is done as part of extracting the snippet text, not when selecting +** the snippet. Snippet selection is done based on doclists only, so there +** is no way for fts3BestSnippet() to know whether or not the document +** actually contains terms that follow the final highlighted term. */ -static int fts3IncrmergeAppend( - Fts3Table *p, /* Fts3 table handle */ - IncrmergeWriter *pWriter, /* Writer object */ - Fts3MultiSegReader *pCsr /* Cursor containing term and doclist */ +static int fts3SnippetShift( + Fts3Table *pTab, /* FTS3 table snippet comes from */ + int iLangid, /* Language id to use in tokenizing */ + int nSnippet, /* Number of tokens desired for snippet */ + const char *zDoc, /* Document text to extract snippet from */ + int nDoc, /* Size of buffer zDoc in bytes */ + int *piPos, /* IN/OUT: First token of snippet */ + u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ ){ - const char *zTerm = pCsr->zTerm; - int nTerm = pCsr->nTerm; - const char *aDoclist = pCsr->aDoclist; - int nDoclist = pCsr->nDoclist; - int rc = SQLITE_OK; /* Return code */ - int nSpace; /* Total space in bytes required on leaf */ - int nPrefix; /* Size of prefix shared with previous term */ - int nSuffix; /* Size of suffix (nTerm - nPrefix) */ - NodeWriter *pLeaf; /* Object used to write leaf nodes */ - - pLeaf = &pWriter->aNodeWriter[0]; - nPrefix = fts3PrefixCompress(pLeaf->key.a, pLeaf->key.n, zTerm, nTerm); - nSuffix = nTerm - nPrefix; + u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ - nSpace = sqlite3Fts3VarintLen(nPrefix); - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; + if( hlmask ){ + int nLeft; /* Tokens to the left of first highlight */ + int nRight; /* Tokens to the right of last highlight */ + int nDesired; /* Ideal number of tokens to shift forward */ - /* If the current block is not empty, and if adding this term/doclist - ** to the current block would make it larger than Fts3Table.nNodeSize - ** bytes, write this block out to the database. */ - if( pLeaf->block.n>0 && (pLeaf->block.n + nSpace)>p->nNodeSize ){ - rc = fts3WriteSegment(p, pLeaf->iBlock, pLeaf->block.a, pLeaf->block.n); - pWriter->nWork++; + for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); + for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); + nDesired = (nLeft-nRight)/2; - /* Add the current term to the parent node. The term added to the - ** parent must: - ** - ** a) be greater than the largest term on the leaf node just written - ** to the database (still available in pLeaf->key), and - ** - ** b) be less than or equal to the term about to be added to the new - ** leaf node (zTerm/nTerm). - ** - ** In other words, it must be the prefix of zTerm 1 byte longer than - ** the common prefix (if any) of zTerm and pWriter->zTerm. + /* Ideally, the start of the snippet should be pushed forward in the + ** document nDesired tokens. This block checks if there are actually + ** nDesired tokens to the right of the snippet. If so, *piPos and + ** *pHlMask are updated to shift the snippet nDesired tokens to the + ** right. Otherwise, the snippet is shifted by the number of tokens + ** available. */ - if( rc==SQLITE_OK ){ - rc = fts3IncrmergePush(p, pWriter, zTerm, nPrefix+1); - } - - /* Advance to the next output block */ - pLeaf->iBlock++; - pLeaf->key.n = 0; - pLeaf->block.n = 0; + if( nDesired>0 ){ + int nShift; /* Number of tokens to shift snippet by */ + int iCurrent = 0; /* Token counter */ + int rc; /* Return Code */ + sqlite3_tokenizer_module *pMod; + sqlite3_tokenizer_cursor *pC; + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - nSuffix = nTerm; - nSpace = 1; - nSpace += sqlite3Fts3VarintLen(nSuffix) + nSuffix; - nSpace += sqlite3Fts3VarintLen(nDoclist) + nDoclist; - } + /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) + ** or more tokens in zDoc/nDoc. + */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); + if( rc!=SQLITE_OK ){ + return rc; + } + while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ + const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); + } + pMod->xClose(pC); + if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } - pWriter->nLeafData += nSpace; - blobGrowBuffer(&pLeaf->block, pLeaf->block.n + nSpace, &rc); - if( rc==SQLITE_OK ){ - if( pLeaf->block.n==0 ){ - pLeaf->block.n = 1; - pLeaf->block.a[0] = '\0'; + nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; + assert( nShift<=nDesired ); + if( nShift>0 ){ + *piPos += nShift; + *pHlmask = hlmask >> nShift; + } } - rc = fts3AppendToNode( - &pLeaf->block, &pLeaf->key, zTerm, nTerm, aDoclist, nDoclist - ); } - - return rc; + return SQLITE_OK; } /* -** This function is called to release all dynamic resources held by the -** merge-writer object pWriter, and if no error has occurred, to flush -** all outstanding node buffers held by pWriter to disk. -** -** If *pRc is not SQLITE_OK when this function is called, then no attempt -** is made to write any data to disk. Instead, this function serves only -** to release outstanding resources. -** -** Otherwise, if *pRc is initially SQLITE_OK and an error occurs while -** flushing buffers to disk, *pRc is set to an SQLite error code before -** returning. +** Extract the snippet text for fragment pFragment from cursor pCsr and +** append it to string buffer pOut. */ -static void fts3IncrmergeRelease( - Fts3Table *p, /* FTS3 table handle */ - IncrmergeWriter *pWriter, /* Merge-writer object */ - int *pRc /* IN/OUT: Error code */ +static int fts3SnippetText( + Fts3Cursor *pCsr, /* FTS3 Cursor */ + SnippetFragment *pFragment, /* Snippet to extract */ + int iFragment, /* Fragment number */ + int isLast, /* True for final fragment in snippet */ + int nSnippet, /* Number of tokens in extracted snippet */ + const char *zOpen, /* String inserted before highlighted term */ + const char *zClose, /* String inserted after highlighted term */ + const char *zEllipsis, /* String inserted between snippets */ + StrBuffer *pOut /* Write output here */ ){ - int i; /* Used to iterate through non-root layers */ - int iRoot; /* Index of root in pWriter->aNodeWriter */ - NodeWriter *pRoot; /* NodeWriter for root node */ - int rc = *pRc; /* Error code */ + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc; /* Return code */ + const char *zDoc; /* Document text to extract snippet from */ + int nDoc; /* Size of zDoc in bytes */ + int iCurrent = 0; /* Current token number of document */ + int iEnd = 0; /* Byte offset of end of current token */ + int isShiftDone = 0; /* True after snippet is shifted */ + int iPos = pFragment->iPos; /* First token of snippet */ + u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ + int iCol = pFragment->iCol+1; /* Query column to extract text from */ + sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ + + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ + return SQLITE_NOMEM; + } + return SQLITE_OK; + } + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); - /* Set iRoot to the index in pWriter->aNodeWriter[] of the output segment - ** root node. If the segment fits entirely on a single leaf node, iRoot - ** will be set to 0. If the root node is the parent of the leaves, iRoot - ** will be 1. And so on. */ - for(iRoot=FTS_MAX_APPENDABLE_HEIGHT-1; iRoot>=0; iRoot--){ - NodeWriter *pNode = &pWriter->aNodeWriter[iRoot]; - if( pNode->block.n>0 ) break; - assert( *pRc || pNode->block.nAlloc==0 ); - assert( *pRc || pNode->key.nAlloc==0 ); - sqlite3_free(pNode->block.a); - sqlite3_free(pNode->key.a); + /* Open a token cursor on the document. */ + pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); + if( rc!=SQLITE_OK ){ + return rc; } - /* Empty output segment. This is a no-op. */ - if( iRoot<0 ) return; + while( rc==SQLITE_OK ){ + const char *ZDUMMY; /* Dummy argument used with tokenizer */ + int DUMMY1 = -1; /* Dummy argument used with tokenizer */ + int iBegin = 0; /* Offset in zDoc of start of token */ + int iFin = 0; /* Offset in zDoc of end of token */ + int isHighlight = 0; /* True for highlighted terms */ - /* The entire output segment fits on a single node. Normally, this means - ** the node would be stored as a blob in the "root" column of the %_segdir - ** table. However, this is not permitted in this case. The problem is that - ** space has already been reserved in the %_segments table, and so the - ** start_block and end_block fields of the %_segdir table must be populated. - ** And, by design or by accident, released versions of FTS cannot handle - ** segments that fit entirely on the root node with start_block!=0. - ** - ** Instead, create a synthetic root node that contains nothing but a - ** pointer to the single content node. So that the segment consists of a - ** single leaf and a single interior (root) node. - ** - ** Todo: Better might be to defer allocating space in the %_segments - ** table until we are sure it is needed. - */ - if( iRoot==0 ){ - Blob *pBlock = &pWriter->aNodeWriter[1].block; - blobGrowBuffer(pBlock, 1 + FTS3_VARINT_MAX, &rc); - if( rc==SQLITE_OK ){ - pBlock->a[0] = 0x01; - pBlock->n = 1 + sqlite3Fts3PutVarint( - &pBlock->a[1], pWriter->aNodeWriter[0].iBlock + /* Variable DUMMY1 is initialized to a negative value above. Elsewhere + ** in the FTS code the variable that the third argument to xNext points to + ** is initialized to zero before the first (*but not necessarily + ** subsequent*) call to xNext(). This is done for a particular application + ** that needs to know whether or not the tokenizer is being used for + ** snippet generation or for some other purpose. + ** + ** Extreme care is required when writing code to depend on this + ** initialization. It is not a documented part of the tokenizer interface. + ** If a tokenizer is used directly by any code outside of FTS, this + ** convention might not be respected. */ + rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); + if( rc!=SQLITE_OK ){ + if( rc==SQLITE_DONE ){ + /* Special case - the last token of the snippet is also the last token + ** of the column. Append any punctuation that occurred between the end + ** of the previous token and the end of the document to the output. + ** Then break out of the loop. */ + rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + } + break; + } + if( iCurrent<iPos ){ continue; } + + if( !isShiftDone ){ + int n = nDoc - iBegin; + rc = fts3SnippetShift( + pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask ); + isShiftDone = 1; + + /* Now that the shift has been done, check if the initial "..." are + ** required. They are required if (a) this is not the first fragment, + ** or (b) this fragment does not begin at position 0 of its column. + */ + if( rc==SQLITE_OK ){ + if( iPos>0 || iFragment>0 ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + }else if( iBegin ){ + rc = fts3StringAppend(pOut, zDoc, iBegin); + } + } + if( rc!=SQLITE_OK || iCurrent<iPos ) continue; } - iRoot = 1; - } - pRoot = &pWriter->aNodeWriter[iRoot]; - /* Flush all currently outstanding nodes to disk. */ - for(i=0; i<iRoot; i++){ - NodeWriter *pNode = &pWriter->aNodeWriter[i]; - if( pNode->block.n>0 && rc==SQLITE_OK ){ - rc = fts3WriteSegment(p, pNode->iBlock, pNode->block.a, pNode->block.n); + if( iCurrent>=(iPos+nSnippet) ){ + if( isLast ){ + rc = fts3StringAppend(pOut, zEllipsis, -1); + } + break; } - sqlite3_free(pNode->block.a); - sqlite3_free(pNode->key.a); - } - /* Write the %_segdir record. */ - if( rc==SQLITE_OK ){ - rc = fts3WriteSegdir(p, - pWriter->iAbsLevel+1, /* level */ - pWriter->iIdx, /* idx */ - pWriter->iStart, /* start_block */ - pWriter->aNodeWriter[0].iBlock, /* leaves_end_block */ - pWriter->iEnd, /* end_block */ - (pWriter->bNoLeafData==0 ? pWriter->nLeafData : 0), /* end_block */ - pRoot->block.a, pRoot->block.n /* root */ - ); + /* Set isHighlight to true if this term should be highlighted. */ + isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; + + if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); + if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); + if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); + + iEnd = iFin; } - sqlite3_free(pRoot->block.a); - sqlite3_free(pRoot->key.a); - *pRc = rc; + pMod->xClose(pC); + return rc; } + /* -** Compare the term in buffer zLhs (size in bytes nLhs) with that in -** zRhs (size in bytes nRhs) using memcmp. If one term is a prefix of -** the other, it is considered to be smaller than the other. +** This function is used to count the entries in a column-list (a +** delta-encoded list of term offsets within a single column of a single +** row). When this function is called, *ppCollist should point to the +** beginning of the first varint in the column-list (the varint that +** contains the position of the first matching term in the column data). +** Before returning, *ppCollist is set to point to the first byte after +** the last varint in the column-list (either the 0x00 signifying the end +** of the position-list, or the 0x01 that precedes the column number of +** the next column in the position-list). ** -** Return -ve if zLhs is smaller than zRhs, 0 if it is equal, or +ve -** if it is greater. +** The number of elements in the column-list is returned. */ -static int fts3TermCmp( - const char *zLhs, int nLhs, /* LHS of comparison */ - const char *zRhs, int nRhs /* RHS of comparison */ -){ - int nCmp = MIN(nLhs, nRhs); - int res; +static int fts3ColumnlistCount(char **ppCollist){ + char *pEnd = *ppCollist; + char c = 0; + int nEntry = 0; - res = memcmp(zLhs, zRhs, nCmp); - if( res==0 ) res = nLhs - nRhs; + /* A column-list is terminated by either a 0x01 or 0x00. */ + while( 0xFE & (*pEnd | c) ){ + c = *pEnd++ & 0x80; + if( !c ) nEntry++; + } - return res; + *ppCollist = pEnd; + return nEntry; } - /* -** Query to see if the entry in the %_segments table with blockid iEnd is -** NULL. If no error occurs and the entry is NULL, set *pbRes 1 before -** returning. Otherwise, set *pbRes to 0. -** -** Or, if an error occurs while querying the database, return an SQLite -** error code. The final value of *pbRes is undefined in this case. -** -** This is used to test if a segment is an "appendable" segment. If it -** is, then a NULL entry has been inserted into the %_segments table -** with blockid %_segdir.end_block. +** This function gathers 'y' or 'b' data for a single phrase. */ -static int fts3IsAppendable(Fts3Table *p, sqlite3_int64 iEnd, int *pbRes){ - int bRes = 0; /* Result to set *pbRes to */ - sqlite3_stmt *pCheck = 0; /* Statement to query database with */ - int rc; /* Return code */ +static void fts3ExprLHits( + Fts3Expr *pExpr, /* Phrase expression node */ + MatchInfo *p /* Matchinfo context */ +){ + Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; + int iStart; + Fts3Phrase *pPhrase = pExpr->pPhrase; + char *pIter = pPhrase->doclist.pList; + int iCol = 0; - rc = fts3SqlStmt(p, SQL_SEGMENT_IS_APPENDABLE, &pCheck, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pCheck, 1, iEnd); - if( SQLITE_ROW==sqlite3_step(pCheck) ) bRes = 1; - rc = sqlite3_reset(pCheck); + assert( p->flag==FTS3_MATCHINFO_LHITS_BM || p->flag==FTS3_MATCHINFO_LHITS ); + if( p->flag==FTS3_MATCHINFO_LHITS ){ + iStart = pExpr->iPhrase * p->nCol; + }else{ + iStart = pExpr->iPhrase * ((p->nCol + 31) / 32); + } + + while( 1 ){ + int nHit = fts3ColumnlistCount(&pIter); + if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ + if( p->flag==FTS3_MATCHINFO_LHITS ){ + p->aMatchinfo[iStart + iCol] = (u32)nHit; + }else if( nHit ){ + p->aMatchinfo[iStart + (iCol+1)/32] |= (1 << (iCol&0x1F)); + } + } + assert( *pIter==0x00 || *pIter==0x01 ); + if( *pIter!=0x01 ) break; + pIter++; + pIter += fts3GetVarint32(pIter, &iCol); } - - *pbRes = bRes; - return rc; } /* -** This function is called when initializing an incremental-merge operation. -** It checks if the existing segment with index value iIdx at absolute level -** (iAbsLevel+1) can be appended to by the incremental merge. If it can, the -** merge-writer object *pWriter is initialized to write to it. +** Gather the results for matchinfo directives 'y' and 'b'. +*/ +static void fts3ExprLHitGather( + Fts3Expr *pExpr, + MatchInfo *p +){ + assert( (pExpr->pLeft==0)==(pExpr->pRight==0) ); + if( pExpr->bEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ + if( pExpr->pLeft ){ + fts3ExprLHitGather(pExpr->pLeft, p); + fts3ExprLHitGather(pExpr->pRight, p); + }else{ + fts3ExprLHits(pExpr, p); + } + } +} + +/* +** fts3ExprIterate() callback used to collect the "global" matchinfo stats +** for a single query. ** -** An existing segment can be appended to by an incremental merge if: +** fts3ExprIterate() callback to load the 'global' elements of a +** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements +** of the matchinfo array that are constant for all rows returned by the +** current query. ** -** * It was initially created as an appendable segment (with all required -** space pre-allocated), and +** Argument pCtx is actually a pointer to a struct of type MatchInfo. This +** function populates Matchinfo.aMatchinfo[] as follows: ** -** * The first key read from the input (arguments zKey and nKey) is -** greater than the largest key currently stored in the potential -** output segment. +** for(iCol=0; iCol<nCol; iCol++){ +** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; +** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; +** } +** +** where X is the number of matches for phrase iPhrase is column iCol of all +** rows of the table. Y is the number of rows for which column iCol contains +** at least one instance of phrase iPhrase. +** +** If the phrase pExpr consists entirely of deferred tokens, then all X and +** Y values are set to nDoc, where nDoc is the number of documents in the +** file system. This is done because the full-text index doclist is required +** to calculate these values properly, and the full-text index doclist is +** not available for deferred tokens. */ -static int fts3IncrmergeLoad( - Fts3Table *p, /* Fts3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ - int iIdx, /* Index of candidate output segment */ - const char *zKey, /* First key to write */ - int nKey, /* Number of bytes in nKey */ - IncrmergeWriter *pWriter /* Populate this object */ +static int fts3ExprGlobalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ ){ - int rc; /* Return code */ - sqlite3_stmt *pSelect = 0; /* SELECT to read %_segdir entry */ + MatchInfo *p = (MatchInfo *)pCtx; + return sqlite3Fts3EvalPhraseStats( + p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] + ); +} - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pSelect, 0); - if( rc==SQLITE_OK ){ - sqlite3_int64 iStart = 0; /* Value of %_segdir.start_block */ - sqlite3_int64 iLeafEnd = 0; /* Value of %_segdir.leaves_end_block */ - sqlite3_int64 iEnd = 0; /* Value of %_segdir.end_block */ - const char *aRoot = 0; /* Pointer to %_segdir.root buffer */ - int nRoot = 0; /* Size of aRoot[] in bytes */ - int rc2; /* Return code from sqlite3_reset() */ - int bAppendable = 0; /* Set to true if segment is appendable */ +/* +** fts3ExprIterate() callback used to collect the "local" part of the +** FTS3_MATCHINFO_HITS array. The local stats are those elements of the +** array that are different for each row returned by the query. +*/ +static int fts3ExprLocalHitsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number */ + void *pCtx /* Pointer to MatchInfo structure */ +){ + int rc = SQLITE_OK; + MatchInfo *p = (MatchInfo *)pCtx; + int iStart = iPhrase * p->nCol * 3; + int i; - /* Read the %_segdir entry for index iIdx absolute level (iAbsLevel+1) */ - sqlite3_bind_int64(pSelect, 1, iAbsLevel+1); - sqlite3_bind_int(pSelect, 2, iIdx); - if( sqlite3_step(pSelect)==SQLITE_ROW ){ - iStart = sqlite3_column_int64(pSelect, 1); - iLeafEnd = sqlite3_column_int64(pSelect, 2); - fts3ReadEndBlockField(pSelect, 3, &iEnd, &pWriter->nLeafData); - if( pWriter->nLeafData<0 ){ - pWriter->nLeafData = pWriter->nLeafData * -1; - } - pWriter->bNoLeafData = (pWriter->nLeafData==0); - nRoot = sqlite3_column_bytes(pSelect, 4); - aRoot = sqlite3_column_blob(pSelect, 4); + for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ + char *pCsr; + rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); + if( pCsr ){ + p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); }else{ - return sqlite3_reset(pSelect); + p->aMatchinfo[iStart+i*3] = 0; } + } - /* Check for the zero-length marker in the %_segments table */ - rc = fts3IsAppendable(p, iEnd, &bAppendable); + return rc; +} - /* Check that zKey/nKey is larger than the largest key the candidate */ - if( rc==SQLITE_OK && bAppendable ){ - char *aLeaf = 0; - int nLeaf = 0; +static int fts3MatchinfoCheck( + Fts3Table *pTab, + char cArg, + char **pzErr +){ + if( (cArg==FTS3_MATCHINFO_NPHRASE) + || (cArg==FTS3_MATCHINFO_NCOL) + || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) + || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) + || (cArg==FTS3_MATCHINFO_LCS) + || (cArg==FTS3_MATCHINFO_HITS) + || (cArg==FTS3_MATCHINFO_LHITS) + || (cArg==FTS3_MATCHINFO_LHITS_BM) + ){ + return SQLITE_OK; + } + sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); + return SQLITE_ERROR; +} - rc = sqlite3Fts3ReadBlock(p, iLeafEnd, &aLeaf, &nLeaf, 0); - if( rc==SQLITE_OK ){ - NodeReader reader; - for(rc = nodeReaderInit(&reader, aLeaf, nLeaf); - rc==SQLITE_OK && reader.aNode; - rc = nodeReaderNext(&reader) - ){ - assert( reader.aNode ); - } - if( fts3TermCmp(zKey, nKey, reader.term.a, reader.term.n)<=0 ){ - bAppendable = 0; - } - nodeReaderRelease(&reader); - } - sqlite3_free(aLeaf); - } +static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ + int nVal; /* Number of integers output by cArg */ - if( rc==SQLITE_OK && bAppendable ){ - /* It is possible to append to this segment. Set up the IncrmergeWriter - ** object to do so. */ - int i; - int nHeight = (int)aRoot[0]; - NodeWriter *pNode; + switch( cArg ){ + case FTS3_MATCHINFO_NDOC: + case FTS3_MATCHINFO_NPHRASE: + case FTS3_MATCHINFO_NCOL: + nVal = 1; + break; - pWriter->nLeafEst = (int)((iEnd - iStart) + 1)/FTS_MAX_APPENDABLE_HEIGHT; - pWriter->iStart = iStart; - pWriter->iEnd = iEnd; - pWriter->iAbsLevel = iAbsLevel; - pWriter->iIdx = iIdx; + case FTS3_MATCHINFO_AVGLENGTH: + case FTS3_MATCHINFO_LENGTH: + case FTS3_MATCHINFO_LCS: + nVal = pInfo->nCol; + break; - for(i=nHeight+1; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ - pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; - } + case FTS3_MATCHINFO_LHITS: + nVal = pInfo->nCol * pInfo->nPhrase; + break; - pNode = &pWriter->aNodeWriter[nHeight]; - pNode->iBlock = pWriter->iStart + pWriter->nLeafEst*nHeight; - blobGrowBuffer(&pNode->block, MAX(nRoot, p->nNodeSize), &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->block.a, aRoot, nRoot); - pNode->block.n = nRoot; - } + case FTS3_MATCHINFO_LHITS_BM: + nVal = pInfo->nPhrase * ((pInfo->nCol + 31) / 32); + break; - for(i=nHeight; i>=0 && rc==SQLITE_OK; i--){ - NodeReader reader; - pNode = &pWriter->aNodeWriter[i]; + default: + assert( cArg==FTS3_MATCHINFO_HITS ); + nVal = pInfo->nCol * pInfo->nPhrase * 3; + break; + } - rc = nodeReaderInit(&reader, pNode->block.a, pNode->block.n); - while( reader.aNode && rc==SQLITE_OK ) rc = nodeReaderNext(&reader); - blobGrowBuffer(&pNode->key, reader.term.n, &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->key.a, reader.term.a, reader.term.n); - pNode->key.n = reader.term.n; - if( i>0 ){ - char *aBlock = 0; - int nBlock = 0; - pNode = &pWriter->aNodeWriter[i-1]; - pNode->iBlock = reader.iChild; - rc = sqlite3Fts3ReadBlock(p, reader.iChild, &aBlock, &nBlock, 0); - blobGrowBuffer(&pNode->block, MAX(nBlock, p->nNodeSize), &rc); - if( rc==SQLITE_OK ){ - memcpy(pNode->block.a, aBlock, nBlock); - pNode->block.n = nBlock; - } - sqlite3_free(aBlock); - } - } - nodeReaderRelease(&reader); - } - } + return nVal; +} - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; +static int fts3MatchinfoSelectDoctotal( + Fts3Table *pTab, + sqlite3_stmt **ppStmt, + sqlite3_int64 *pnDoc, + const char **paLen +){ + sqlite3_stmt *pStmt; + const char *a; + sqlite3_int64 nDoc; + + if( !*ppStmt ){ + int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); + if( rc!=SQLITE_OK ) return rc; } + pStmt = *ppStmt; + assert( sqlite3_data_count(pStmt)==1 ); - return rc; + a = sqlite3_column_blob(pStmt, 0); + a += sqlite3Fts3GetVarint(a, &nDoc); + if( nDoc==0 ) return FTS_CORRUPT_VTAB; + *pnDoc = (u32)nDoc; + + if( paLen ) *paLen = a; + return SQLITE_OK; } /* -** Determine the largest segment index value that exists within absolute -** level iAbsLevel+1. If no error occurs, set *piIdx to this value plus -** one before returning SQLITE_OK. Or, if there are no segments at all -** within level iAbsLevel, set *piIdx to zero. -** -** If an error occurs, return an SQLite error code. The final value of -** *piIdx is undefined in this case. +** An instance of the following structure is used to store state while +** iterating through a multi-column position-list corresponding to the +** hits for a single phrase on a single row in order to calculate the +** values for a matchinfo() FTS3_MATCHINFO_LCS request. */ -static int fts3IncrmergeOutputIdx( - Fts3Table *p, /* FTS Table handle */ - sqlite3_int64 iAbsLevel, /* Absolute index of input segments */ - int *piIdx /* OUT: Next free index at iAbsLevel+1 */ +typedef struct LcsIterator LcsIterator; +struct LcsIterator { + Fts3Expr *pExpr; /* Pointer to phrase expression */ + int iPosOffset; /* Tokens count up to end of this phrase */ + char *pRead; /* Cursor used to iterate through aDoclist */ + int iPos; /* Current position */ +}; + +/* +** If LcsIterator.iCol is set to the following value, the iterator has +** finished iterating through all offsets for all columns. +*/ +#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; + +static int fts3MatchinfoLcsCb( + Fts3Expr *pExpr, /* Phrase expression node */ + int iPhrase, /* Phrase number (numbered from zero) */ + void *pCtx /* Pointer to MatchInfo structure */ ){ - int rc; - sqlite3_stmt *pOutputIdx = 0; /* SQL used to find output index */ + LcsIterator *aIter = (LcsIterator *)pCtx; + aIter[iPhrase].pExpr = pExpr; + return SQLITE_OK; +} - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENT_INDEX, &pOutputIdx, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pOutputIdx, 1, iAbsLevel+1); - sqlite3_step(pOutputIdx); - *piIdx = sqlite3_column_int(pOutputIdx, 0); - rc = sqlite3_reset(pOutputIdx); +/* +** Advance the iterator passed as an argument to the next position. Return +** 1 if the iterator is at EOF or if it now points to the start of the +** position list for the next column. +*/ +static int fts3LcsIteratorAdvance(LcsIterator *pIter){ + char *pRead = pIter->pRead; + sqlite3_int64 iRead; + int rc = 0; + + pRead += sqlite3Fts3GetVarint(pRead, &iRead); + if( iRead==0 || iRead==1 ){ + pRead = 0; + rc = 1; + }else{ + pIter->iPos += (int)(iRead-2); } + pIter->pRead = pRead; return rc; } - -/* -** Allocate an appendable output segment on absolute level iAbsLevel+1 -** with idx value iIdx. -** -** In the %_segdir table, a segment is defined by the values in three -** columns: -** -** start_block -** leaves_end_block -** end_block -** -** When an appendable segment is allocated, it is estimated that the -** maximum number of leaf blocks that may be required is the sum of the -** number of leaf blocks consumed by the input segments, plus the number -** of input segments, multiplied by two. This value is stored in stack -** variable nLeafEst. + +/* +** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. ** -** A total of 16*nLeafEst blocks are allocated when an appendable segment -** is created ((1 + end_block - start_block)==16*nLeafEst). The contiguous -** array of leaf nodes starts at the first block allocated. The array -** of interior nodes that are parents of the leaf nodes start at block -** (start_block + (1 + end_block - start_block) / 16). And so on. +** If the call is successful, the longest-common-substring lengths for each +** column are written into the first nCol elements of the pInfo->aMatchinfo[] +** array before returning. SQLITE_OK is returned in this case. ** -** In the actual code below, the value "16" is replaced with the -** pre-processor macro FTS_MAX_APPENDABLE_HEIGHT. +** Otherwise, if an error occurs, an SQLite error code is returned and the +** data written to the first nCol elements of pInfo->aMatchinfo[] is +** undefined. */ -static int fts3IncrmergeWriter( - Fts3Table *p, /* Fts3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level of input segments */ - int iIdx, /* Index of new output segment */ - Fts3MultiSegReader *pCsr, /* Cursor that data will be read from */ - IncrmergeWriter *pWriter /* Populate this object */ -){ - int rc; /* Return Code */ - int i; /* Iterator variable */ - int nLeafEst = 0; /* Blocks allocated for leaf nodes */ - sqlite3_stmt *pLeafEst = 0; /* SQL used to determine nLeafEst */ - sqlite3_stmt *pFirstBlock = 0; /* SQL used to determine first block */ +static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ + LcsIterator *aIter; + int i; + int iCol; + int nToken = 0; - /* Calculate nLeafEst. */ - rc = fts3SqlStmt(p, SQL_MAX_LEAF_NODE_ESTIMATE, &pLeafEst, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pLeafEst, 1, iAbsLevel); - sqlite3_bind_int64(pLeafEst, 2, pCsr->nSegment); - if( SQLITE_ROW==sqlite3_step(pLeafEst) ){ - nLeafEst = sqlite3_column_int(pLeafEst, 0); - } - rc = sqlite3_reset(pLeafEst); + /* Allocate and populate the array of LcsIterator objects. The array + ** contains one element for each matchable phrase in the query. + **/ + aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); + if( !aIter ) return SQLITE_NOMEM; + memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); + (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); + + for(i=0; i<pInfo->nPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + nToken -= pIter->pExpr->pPhrase->nToken; + pIter->iPosOffset = nToken; } - if( rc!=SQLITE_OK ) return rc; - /* Calculate the first block to use in the output segment */ - rc = fts3SqlStmt(p, SQL_NEXT_SEGMENTS_ID, &pFirstBlock, 0); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pFirstBlock) ){ - pWriter->iStart = sqlite3_column_int64(pFirstBlock, 0); - pWriter->iEnd = pWriter->iStart - 1; - pWriter->iEnd += nLeafEst * FTS_MAX_APPENDABLE_HEIGHT; + for(iCol=0; iCol<pInfo->nCol; iCol++){ + int nLcs = 0; /* LCS value for this column */ + int nLive = 0; /* Number of iterators in aIter not at EOF */ + + for(i=0; i<pInfo->nPhrase; i++){ + int rc; + LcsIterator *pIt = &aIter[i]; + rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); + if( rc!=SQLITE_OK ) return rc; + if( pIt->pRead ){ + pIt->iPos = pIt->iPosOffset; + fts3LcsIteratorAdvance(&aIter[i]); + nLive++; + } } - rc = sqlite3_reset(pFirstBlock); - } - if( rc!=SQLITE_OK ) return rc; - /* Insert the marker in the %_segments table to make sure nobody tries - ** to steal the space just allocated. This is also used to identify - ** appendable segments. */ - rc = fts3WriteSegment(p, pWriter->iEnd, 0, 0); - if( rc!=SQLITE_OK ) return rc; + while( nLive>0 ){ + LcsIterator *pAdv = 0; /* The iterator to advance by one position */ + int nThisLcs = 0; /* LCS for the current iterator positions */ - pWriter->iAbsLevel = iAbsLevel; - pWriter->nLeafEst = nLeafEst; - pWriter->iIdx = iIdx; + for(i=0; i<pInfo->nPhrase; i++){ + LcsIterator *pIter = &aIter[i]; + if( pIter->pRead==0 ){ + /* This iterator is already at EOF for this column. */ + nThisLcs = 0; + }else{ + if( pAdv==0 || pIter->iPos<pAdv->iPos ){ + pAdv = pIter; + } + if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ + nThisLcs++; + }else{ + nThisLcs = 1; + } + if( nThisLcs>nLcs ) nLcs = nThisLcs; + } + } + if( fts3LcsIteratorAdvance(pAdv) ) nLive--; + } - /* Set up the array of NodeWriter objects */ - for(i=0; i<FTS_MAX_APPENDABLE_HEIGHT; i++){ - pWriter->aNodeWriter[i].iBlock = pWriter->iStart + i*pWriter->nLeafEst; + pInfo->aMatchinfo[iCol] = nLcs; } + + sqlite3_free(aIter); return SQLITE_OK; } /* -** Remove an entry from the %_segdir table. This involves running the -** following two statements: +** Populate the buffer pInfo->aMatchinfo[] with an array of integers to +** be returned by the matchinfo() function. Argument zArg contains the +** format string passed as the second argument to matchinfo (or the +** default value "pcx" if no second argument was specified). The format +** string has already been validated and the pInfo->aMatchinfo[] array +** is guaranteed to be large enough for the output. ** -** DELETE FROM %_segdir WHERE level = :iAbsLevel AND idx = :iIdx -** UPDATE %_segdir SET idx = idx - 1 WHERE level = :iAbsLevel AND idx > :iIdx +** If bGlobal is true, then populate all fields of the matchinfo() output. +** If it is false, then assume that those fields that do not change between +** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) +** have already been populated. ** -** The DELETE statement removes the specific %_segdir level. The UPDATE -** statement ensures that the remaining segments have contiguously allocated -** idx values. +** Return SQLITE_OK if successful, or an SQLite error code if an error +** occurs. If a value other than SQLITE_OK is returned, the state the +** pInfo->aMatchinfo[] buffer is left in is undefined. */ -static int fts3RemoveSegdirEntry( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level to delete from */ - int iIdx /* Index of %_segdir entry to delete */ +static int fts3MatchinfoValues( + Fts3Cursor *pCsr, /* FTS3 cursor object */ + int bGlobal, /* True to grab the global stats */ + MatchInfo *pInfo, /* Matchinfo context object */ + const char *zArg /* Matchinfo format string */ ){ - int rc; /* Return code */ - sqlite3_stmt *pDelete = 0; /* DELETE statement */ + int rc = SQLITE_OK; + int i; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_stmt *pSelect = 0; - rc = fts3SqlStmt(p, SQL_DELETE_SEGDIR_ENTRY, &pDelete, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDelete, 1, iAbsLevel); - sqlite3_bind_int(pDelete, 2, iIdx); - sqlite3_step(pDelete); - rc = sqlite3_reset(pDelete); - } + for(i=0; rc==SQLITE_OK && zArg[i]; i++){ + pInfo->flag = zArg[i]; + switch( zArg[i] ){ + case FTS3_MATCHINFO_NPHRASE: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; + break; - return rc; -} + case FTS3_MATCHINFO_NCOL: + if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; + break; + + case FTS3_MATCHINFO_NDOC: + if( bGlobal ){ + sqlite3_int64 nDoc = 0; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); + pInfo->aMatchinfo[0] = (u32)nDoc; + } + break; -/* -** One or more segments have just been removed from absolute level iAbsLevel. -** Update the 'idx' values of the remaining segments in the level so that -** the idx values are a contiguous sequence starting from 0. -*/ -static int fts3RepackSegdirLevel( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel /* Absolute level to repack */ -){ - int rc; /* Return code */ - int *aIdx = 0; /* Array of remaining idx values */ - int nIdx = 0; /* Valid entries in aIdx[] */ - int nAlloc = 0; /* Allocated size of aIdx[] */ - int i; /* Iterator variable */ - sqlite3_stmt *pSelect = 0; /* Select statement to read idx values */ - sqlite3_stmt *pUpdate = 0; /* Update statement to modify idx values */ + case FTS3_MATCHINFO_AVGLENGTH: + if( bGlobal ){ + sqlite3_int64 nDoc; /* Number of rows in table */ + const char *a; /* Aggregate column length array */ - rc = fts3SqlStmt(p, SQL_SELECT_INDEXES, &pSelect, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int64(pSelect, 1, iAbsLevel); - while( SQLITE_ROW==sqlite3_step(pSelect) ){ - if( nIdx>=nAlloc ){ - int *aNew; - nAlloc += 16; - aNew = sqlite3_realloc(aIdx, nAlloc*sizeof(int)); - if( !aNew ){ - rc = SQLITE_NOMEM; - break; + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); + if( rc==SQLITE_OK ){ + int iCol; + for(iCol=0; iCol<pInfo->nCol; iCol++){ + u32 iVal; + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); + pInfo->aMatchinfo[iCol] = iVal; + } + } } - aIdx = aNew; + break; + + case FTS3_MATCHINFO_LENGTH: { + sqlite3_stmt *pSelectDocsize = 0; + rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); + if( rc==SQLITE_OK ){ + int iCol; + const char *a = sqlite3_column_blob(pSelectDocsize, 0); + for(iCol=0; iCol<pInfo->nCol; iCol++){ + sqlite3_int64 nToken; + a += sqlite3Fts3GetVarint(a, &nToken); + pInfo->aMatchinfo[iCol] = (u32)nToken; + } + } + sqlite3_reset(pSelectDocsize); + break; } - aIdx[nIdx++] = sqlite3_column_int(pSelect, 0); - } - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; - } - if( rc==SQLITE_OK ){ - rc = fts3SqlStmt(p, SQL_SHIFT_SEGDIR_ENTRY, &pUpdate, 0); - } - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pUpdate, 2, iAbsLevel); - } + case FTS3_MATCHINFO_LCS: + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc==SQLITE_OK ){ + rc = fts3MatchinfoLcs(pCsr, pInfo); + } + break; - assert( p->bIgnoreSavepoint==0 ); - p->bIgnoreSavepoint = 1; - for(i=0; rc==SQLITE_OK && i<nIdx; i++){ - if( aIdx[i]!=i ){ - sqlite3_bind_int(pUpdate, 3, aIdx[i]); - sqlite3_bind_int(pUpdate, 1, i); - sqlite3_step(pUpdate); - rc = sqlite3_reset(pUpdate); + case FTS3_MATCHINFO_LHITS_BM: + case FTS3_MATCHINFO_LHITS: { + int nZero = fts3MatchinfoSize(pInfo, zArg[i]) * sizeof(u32); + memset(pInfo->aMatchinfo, 0, nZero); + fts3ExprLHitGather(pCsr->pExpr, pInfo); + break; + } + + default: { + Fts3Expr *pExpr; + assert( zArg[i]==FTS3_MATCHINFO_HITS ); + pExpr = pCsr->pExpr; + rc = fts3ExprLoadDoclists(pCsr, 0, 0); + if( rc!=SQLITE_OK ) break; + if( bGlobal ){ + if( pCsr->pDeferred ){ + rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); + if( rc!=SQLITE_OK ) break; + } + rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); + sqlite3Fts3EvalTestDeferred(pCsr, &rc); + if( rc!=SQLITE_OK ) break; + } + (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); + break; + } } + + pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); } - p->bIgnoreSavepoint = 0; - sqlite3_free(aIdx); + sqlite3_reset(pSelect); return rc; } -static void fts3StartNode(Blob *pNode, int iHeight, sqlite3_int64 iChild){ - pNode->a[0] = (char)iHeight; - if( iChild ){ - assert( pNode->nAlloc>=1+sqlite3Fts3VarintLen(iChild) ); - pNode->n = 1 + sqlite3Fts3PutVarint(&pNode->a[1], iChild); - }else{ - assert( pNode->nAlloc>=1 ); - pNode->n = 1; - } -} /* -** The first two arguments are a pointer to and the size of a segment b-tree -** node. The node may be a leaf or an internal node. -** -** This function creates a new node image in blob object *pNew by copying -** all terms that are greater than or equal to zTerm/nTerm (for leaf nodes) -** or greater than zTerm/nTerm (for internal nodes) from aNode/nNode. +** Populate pCsr->aMatchinfo[] with data for the current row. The +** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). */ -static int fts3TruncateNode( - const char *aNode, /* Current node image */ - int nNode, /* Size of aNode in bytes */ - Blob *pNew, /* OUT: Write new node image here */ - const char *zTerm, /* Omit all terms smaller than this */ - int nTerm, /* Size of zTerm in bytes */ - sqlite3_int64 *piBlock /* OUT: Block number in next layer down */ +static void fts3GetMatchinfo( + sqlite3_context *pCtx, /* Return results here */ + Fts3Cursor *pCsr, /* FTS3 Cursor object */ + const char *zArg /* Second argument to matchinfo() function */ ){ - NodeReader reader; /* Reader object */ - Blob prev = {0, 0, 0}; /* Previous term written to new node */ - int rc = SQLITE_OK; /* Return code */ - int bLeaf = aNode[0]=='\0'; /* True for a leaf node */ + MatchInfo sInfo; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + int rc = SQLITE_OK; + int bGlobal = 0; /* Collect 'global' stats as well as local */ - /* Allocate required output space */ - blobGrowBuffer(pNew, nNode, &rc); - if( rc!=SQLITE_OK ) return rc; - pNew->n = 0; + u32 *aOut = 0; + void (*xDestroyOut)(void*) = 0; - /* Populate new node buffer */ - for(rc = nodeReaderInit(&reader, aNode, nNode); - rc==SQLITE_OK && reader.aNode; - rc = nodeReaderNext(&reader) - ){ - if( pNew->n==0 ){ - int res = fts3TermCmp(reader.term.a, reader.term.n, zTerm, nTerm); - if( res<0 || (bLeaf==0 && res==0) ) continue; - fts3StartNode(pNew, (int)aNode[0], reader.iChild); - *piBlock = reader.iChild; - } - rc = fts3AppendToNode( - pNew, &prev, reader.term.a, reader.term.n, - reader.aDoclist, reader.nDoclist - ); - if( rc!=SQLITE_OK ) break; - } - if( pNew->n==0 ){ - fts3StartNode(pNew, (int)aNode[0], reader.iChild); - *piBlock = reader.iChild; + memset(&sInfo, 0, sizeof(MatchInfo)); + sInfo.pCursor = pCsr; + sInfo.nCol = pTab->nColumn; + + /* If there is cached matchinfo() data, but the format string for the + ** cache does not match the format string for this request, discard + ** the cached data. */ + if( pCsr->pMIBuffer && strcmp(pCsr->pMIBuffer->zMatchinfo, zArg) ){ + sqlite3Fts3MIBufferFree(pCsr->pMIBuffer); + pCsr->pMIBuffer = 0; } - assert( pNew->n<=pNew->nAlloc ); - nodeReaderRelease(&reader); - sqlite3_free(prev.a); - return rc; -} + /* If Fts3Cursor.pMIBuffer is NULL, then this is the first time the + ** matchinfo function has been called for this query. In this case + ** allocate the array used to accumulate the matchinfo data and + ** initialize those elements that are constant for every row. + */ + if( pCsr->pMIBuffer==0 ){ + int nMatchinfo = 0; /* Number of u32 elements in match-info */ + int i; /* Used to iterate through zArg */ -/* -** Remove all terms smaller than zTerm/nTerm from segment iIdx in absolute -** level iAbsLevel. This may involve deleting entries from the %_segments -** table, and modifying existing entries in both the %_segments and %_segdir -** tables. -** -** SQLITE_OK is returned if the segment is updated successfully. Or an -** SQLite error code otherwise. -*/ -static int fts3TruncateSegment( - Fts3Table *p, /* FTS3 table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level of segment to modify */ - int iIdx, /* Index within level of segment to modify */ - const char *zTerm, /* Remove terms smaller than this */ - int nTerm /* Number of bytes in buffer zTerm */ -){ - int rc = SQLITE_OK; /* Return code */ - Blob root = {0,0,0}; /* New root page image */ - Blob block = {0,0,0}; /* Buffer used for any other block */ - sqlite3_int64 iBlock = 0; /* Block id */ - sqlite3_int64 iNewStart = 0; /* New value for iStartBlock */ - sqlite3_int64 iOldStart = 0; /* Old value for iStartBlock */ - sqlite3_stmt *pFetch = 0; /* Statement used to fetch segdir */ + /* Determine the number of phrases in the query */ + pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); + sInfo.nPhrase = pCsr->nPhrase; - rc = fts3SqlStmt(p, SQL_SELECT_SEGDIR, &pFetch, 0); - if( rc==SQLITE_OK ){ - int rc2; /* sqlite3_reset() return code */ - sqlite3_bind_int64(pFetch, 1, iAbsLevel); - sqlite3_bind_int(pFetch, 2, iIdx); - if( SQLITE_ROW==sqlite3_step(pFetch) ){ - const char *aRoot = sqlite3_column_blob(pFetch, 4); - int nRoot = sqlite3_column_bytes(pFetch, 4); - iOldStart = sqlite3_column_int64(pFetch, 1); - rc = fts3TruncateNode(aRoot, nRoot, &root, zTerm, nTerm, &iBlock); + /* Determine the number of integers in the buffer returned by this call. */ + for(i=0; zArg[i]; i++){ + char *zErr = 0; + if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ + sqlite3_result_error(pCtx, zErr, -1); + sqlite3_free(zErr); + return; + } + nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); } - rc2 = sqlite3_reset(pFetch); - if( rc==SQLITE_OK ) rc = rc2; - } - while( rc==SQLITE_OK && iBlock ){ - char *aBlock = 0; - int nBlock = 0; - iNewStart = iBlock; + /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ + pCsr->pMIBuffer = fts3MIBufferNew(nMatchinfo, zArg); + if( !pCsr->pMIBuffer ) rc = SQLITE_NOMEM; - rc = sqlite3Fts3ReadBlock(p, iBlock, &aBlock, &nBlock, 0); - if( rc==SQLITE_OK ){ - rc = fts3TruncateNode(aBlock, nBlock, &block, zTerm, nTerm, &iBlock); - } - if( rc==SQLITE_OK ){ - rc = fts3WriteSegment(p, iNewStart, block.a, block.n); - } - sqlite3_free(aBlock); + pCsr->isMatchinfoNeeded = 1; + bGlobal = 1; } - /* Variable iNewStart now contains the first valid leaf node. */ - if( rc==SQLITE_OK && iNewStart ){ - sqlite3_stmt *pDel = 0; - rc = fts3SqlStmt(p, SQL_DELETE_SEGMENTS_RANGE, &pDel, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pDel, 1, iOldStart); - sqlite3_bind_int64(pDel, 2, iNewStart-1); - sqlite3_step(pDel); - rc = sqlite3_reset(pDel); + if( rc==SQLITE_OK ){ + xDestroyOut = fts3MIBufferAlloc(pCsr->pMIBuffer, &aOut); + if( xDestroyOut==0 ){ + rc = SQLITE_NOMEM; } } if( rc==SQLITE_OK ){ - sqlite3_stmt *pChomp = 0; - rc = fts3SqlStmt(p, SQL_CHOMP_SEGDIR, &pChomp, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pChomp, 1, iNewStart); - sqlite3_bind_blob(pChomp, 2, root.a, root.n, SQLITE_STATIC); - sqlite3_bind_int64(pChomp, 3, iAbsLevel); - sqlite3_bind_int(pChomp, 4, iIdx); - sqlite3_step(pChomp); - rc = sqlite3_reset(pChomp); + sInfo.aMatchinfo = aOut; + sInfo.nPhrase = pCsr->nPhrase; + rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); + if( bGlobal ){ + fts3MIBufferSetGlobal(pCsr->pMIBuffer); } } - sqlite3_free(root.a); - sqlite3_free(block.a); - return rc; + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + if( xDestroyOut ) xDestroyOut(aOut); + }else{ + int n = pCsr->pMIBuffer->nElem * sizeof(u32); + sqlite3_result_blob(pCtx, aOut, n, xDestroyOut); + } } /* -** This function is called after an incrmental-merge operation has run to -** merge (or partially merge) two or more segments from absolute level -** iAbsLevel. -** -** Each input segment is either removed from the db completely (if all of -** its data was copied to the output segment by the incrmerge operation) -** or modified in place so that it no longer contains those entries that -** have been duplicated in the output segment. +** Implementation of snippet() function. */ -static int fts3IncrmergeChomp( - Fts3Table *p, /* FTS table handle */ - sqlite3_int64 iAbsLevel, /* Absolute level containing segments */ - Fts3MultiSegReader *pCsr, /* Chomp all segments opened by this cursor */ - int *pnRem /* Number of segments not deleted */ +SQLITE_PRIVATE void sqlite3Fts3Snippet( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr, /* Cursor object */ + const char *zStart, /* Snippet start text - "<b>" */ + const char *zEnd, /* Snippet end text - "</b>" */ + const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ + int iCol, /* Extract snippet from this column */ + int nToken /* Approximate number of tokens in snippet */ ){ - int i; - int nRem = 0; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; int rc = SQLITE_OK; + int i; + StrBuffer res = {0, 0, 0}; - for(i=pCsr->nSegment-1; i>=0 && rc==SQLITE_OK; i--){ - Fts3SegReader *pSeg = 0; - int j; + /* The returned text includes up to four fragments of text extracted from + ** the data in the current row. The first iteration of the for(...) loop + ** below attempts to locate a single fragment of text nToken tokens in + ** size that contains at least one instance of all phrases in the query + ** expression that appear in the current row. If such a fragment of text + ** cannot be found, the second iteration of the loop attempts to locate + ** a pair of fragments, and so on. + */ + int nSnippet = 0; /* Number of fragments in this snippet */ + SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ + int nFToken = -1; /* Number of tokens in each fragment */ - /* Find the Fts3SegReader object with Fts3SegReader.iIdx==i. It is hiding - ** somewhere in the pCsr->apSegment[] array. */ - for(j=0; ALWAYS(j<pCsr->nSegment); j++){ - pSeg = pCsr->apSegment[j]; - if( pSeg->iIdx==i ) break; + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + + for(nSnippet=1; 1; nSnippet++){ + + int iSnip; /* Loop counter 0..nSnippet-1 */ + u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ + u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ + + if( nToken>=0 ){ + nFToken = (nToken+nSnippet-1) / nSnippet; + }else{ + nFToken = -1 * nToken; } - assert( j<pCsr->nSegment && pSeg->iIdx==i ); - if( pSeg->aNode==0 ){ - /* Seg-reader is at EOF. Remove the entire input segment. */ - rc = fts3DeleteSegment(p, pSeg); - if( rc==SQLITE_OK ){ - rc = fts3RemoveSegdirEntry(p, iAbsLevel, pSeg->iIdx); + for(iSnip=0; iSnip<nSnippet; iSnip++){ + int iBestScore = -1; /* Best score of columns checked so far */ + int iRead; /* Used to iterate through columns */ + SnippetFragment *pFragment = &aSnippet[iSnip]; + + memset(pFragment, 0, sizeof(*pFragment)); + + /* Loop through all columns of the table being considered for snippets. + ** If the iCol argument to this function was negative, this means all + ** columns of the FTS3 table. Otherwise, only column iCol is considered. + */ + for(iRead=0; iRead<pTab->nColumn; iRead++){ + SnippetFragment sF = {0, 0, 0, 0}; + int iS = 0; + if( iCol>=0 && iRead!=iCol ) continue; + + /* Find the best snippet of nFToken tokens in column iRead. */ + rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); + if( rc!=SQLITE_OK ){ + goto snippet_out; + } + if( iS>iBestScore ){ + *pFragment = sF; + iBestScore = iS; + } } - *pnRem = 0; - }else{ - /* The incremental merge did not copy all the data from this - ** segment to the upper level. The segment is modified in place - ** so that it contains no keys smaller than zTerm/nTerm. */ - const char *zTerm = pSeg->zTerm; - int nTerm = pSeg->nTerm; - rc = fts3TruncateSegment(p, iAbsLevel, pSeg->iIdx, zTerm, nTerm); - nRem++; + + mCovered |= pFragment->covered; } + + /* If all query phrases seen by fts3BestSnippet() are present in at least + ** one of the nSnippet snippet fragments, break out of the loop. + */ + assert( (mCovered&mSeen)==mCovered ); + if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; } - if( rc==SQLITE_OK && nRem!=pCsr->nSegment ){ - rc = fts3RepackSegdirLevel(p, iAbsLevel); + assert( nFToken>0 ); + + for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ + rc = fts3SnippetText(pCsr, &aSnippet[i], + i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res + ); } - *pnRem = nRem; - return rc; + snippet_out: + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); + } } -/* -** Store an incr-merge hint in the database. -*/ -static int fts3IncrmergeHintStore(Fts3Table *p, Blob *pHint){ - sqlite3_stmt *pReplace = 0; - int rc; /* Return code */ - rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pReplace, 0); - if( rc==SQLITE_OK ){ - sqlite3_bind_int(pReplace, 1, FTS_STAT_INCRMERGEHINT); - sqlite3_bind_blob(pReplace, 2, pHint->a, pHint->n, SQLITE_STATIC); - sqlite3_step(pReplace); - rc = sqlite3_reset(pReplace); - } +typedef struct TermOffset TermOffset; +typedef struct TermOffsetCtx TermOffsetCtx; - return rc; -} +struct TermOffset { + char *pList; /* Position-list */ + int iPos; /* Position just read from pList */ + int iOff; /* Offset of this term from read positions */ +}; + +struct TermOffsetCtx { + Fts3Cursor *pCsr; + int iCol; /* Column of table to populate aTerm for */ + int iTerm; + sqlite3_int64 iDocid; + TermOffset *aTerm; +}; /* -** Load an incr-merge hint from the database. The incr-merge hint, if one -** exists, is stored in the rowid==1 row of the %_stat table. -** -** If successful, populate blob *pHint with the value read from the %_stat -** table and return SQLITE_OK. Otherwise, if an error occurs, return an -** SQLite error code. +** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). */ -static int fts3IncrmergeHintLoad(Fts3Table *p, Blob *pHint){ - sqlite3_stmt *pSelect = 0; +static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ + TermOffsetCtx *p = (TermOffsetCtx *)ctx; + int nTerm; /* Number of tokens in phrase */ + int iTerm; /* For looping through nTerm phrase terms */ + char *pList; /* Pointer to position list for phrase */ + int iPos = 0; /* First position in position-list */ int rc; - pHint->n = 0; - rc = fts3SqlStmt(p, SQL_SELECT_STAT, &pSelect, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pSelect, 1, FTS_STAT_INCRMERGEHINT); - if( SQLITE_ROW==sqlite3_step(pSelect) ){ - const char *aHint = sqlite3_column_blob(pSelect, 0); - int nHint = sqlite3_column_bytes(pSelect, 0); - if( aHint ){ - blobGrowBuffer(pHint, nHint, &rc); - if( rc==SQLITE_OK ){ - memcpy(pHint->a, aHint, nHint); - pHint->n = nHint; - } - } - } - rc2 = sqlite3_reset(pSelect); - if( rc==SQLITE_OK ) rc = rc2; + UNUSED_PARAMETER(iPhrase); + rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); + nTerm = pExpr->pPhrase->nToken; + if( pList ){ + fts3GetDeltaPosition(&pList, &iPos); + assert( iPos>=0 ); + } + + for(iTerm=0; iTerm<nTerm; iTerm++){ + TermOffset *pT = &p->aTerm[p->iTerm++]; + pT->iOff = nTerm-iTerm-1; + pT->pList = pList; + pT->iPos = iPos; } return rc; } /* -** If *pRc is not SQLITE_OK when this function is called, it is a no-op. -** Otherwise, append an entry to the hint stored in blob *pHint. Each entry -** consists of two varints, the absolute level number of the input segments -** and the number of input segments. -** -** If successful, leave *pRc set to SQLITE_OK and return. If an error occurs, -** set *pRc to an SQLite error code before returning. +** Implementation of offsets() function. */ -static void fts3IncrmergeHintPush( - Blob *pHint, /* Hint blob to append to */ - i64 iAbsLevel, /* First varint to store in hint */ - int nInput, /* Second varint to store in hint */ - int *pRc /* IN/OUT: Error code */ +SQLITE_PRIVATE void sqlite3Fts3Offsets( + sqlite3_context *pCtx, /* SQLite function call context */ + Fts3Cursor *pCsr /* Cursor object */ ){ - blobGrowBuffer(pHint, pHint->n + 2*FTS3_VARINT_MAX, pRc); - if( *pRc==SQLITE_OK ){ - pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], iAbsLevel); - pHint->n += sqlite3Fts3PutVarint(&pHint->a[pHint->n], (i64)nInput); - } -} - -/* -** Read the last entry (most recently pushed) from the hint blob *pHint -** and then remove the entry. Write the two values read to *piAbsLevel and -** *pnInput before returning. -** -** If no error occurs, return SQLITE_OK. If the hint blob in *pHint does -** not contain at least two valid varints, return SQLITE_CORRUPT_VTAB. -*/ -static int fts3IncrmergeHintPop(Blob *pHint, i64 *piAbsLevel, int *pnInput){ - const int nHint = pHint->n; - int i; - - i = pHint->n-2; - while( i>0 && (pHint->a[i-1] & 0x80) ) i--; - while( i>0 && (pHint->a[i-1] & 0x80) ) i--; - - pHint->n = i; - i += sqlite3Fts3GetVarint(&pHint->a[i], piAbsLevel); - i += fts3GetVarint32(&pHint->a[i], pnInput); - if( i!=nHint ) return FTS_CORRUPT_VTAB; + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; + int rc; /* Return Code */ + int nToken; /* Number of tokens in query */ + int iCol; /* Column currently being processed */ + StrBuffer res = {0, 0, 0}; /* Result string */ + TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ - return SQLITE_OK; -} + if( !pCsr->pExpr ){ + sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); + return; + } + memset(&sCtx, 0, sizeof(sCtx)); + assert( pCsr->isRequireSeek==0 ); -/* -** Attempt an incremental merge that writes nMerge leaf blocks. -** -** Incremental merges happen nMin segments at a time. The segments -** to be merged are the nMin oldest segments (the ones with the smallest -** values for the _segdir.idx field) in the highest level that contains -** at least nMin segments. Multiple merges might occur in an attempt to -** write the quota of nMerge leaf blocks. -*/ -SQLITE_PRIVATE int sqlite3Fts3Incrmerge(Fts3Table *p, int nMerge, int nMin){ - int rc; /* Return code */ - int nRem = nMerge; /* Number of leaf pages yet to be written */ - Fts3MultiSegReader *pCsr; /* Cursor used to read input data */ - Fts3SegFilter *pFilter; /* Filter used with cursor pCsr */ - IncrmergeWriter *pWriter; /* Writer object */ - int nSeg = 0; /* Number of input segments */ - sqlite3_int64 iAbsLevel = 0; /* Absolute level number to work on */ - Blob hint = {0, 0, 0}; /* Hint read from %_stat table */ - int bDirtyHint = 0; /* True if blob 'hint' has been modified */ + /* Count the number of terms in the query */ + rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); + if( rc!=SQLITE_OK ) goto offsets_out; - /* Allocate space for the cursor, filter and writer objects */ - const int nAlloc = sizeof(*pCsr) + sizeof(*pFilter) + sizeof(*pWriter); - pWriter = (IncrmergeWriter *)sqlite3_malloc(nAlloc); - if( !pWriter ) return SQLITE_NOMEM; - pFilter = (Fts3SegFilter *)&pWriter[1]; - pCsr = (Fts3MultiSegReader *)&pFilter[1]; + /* Allocate the array of TermOffset iterators. */ + sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); + if( 0==sCtx.aTerm ){ + rc = SQLITE_NOMEM; + goto offsets_out; + } + sCtx.iDocid = pCsr->iPrevId; + sCtx.pCsr = pCsr; - rc = fts3IncrmergeHintLoad(p, &hint); - while( rc==SQLITE_OK && nRem>0 ){ - const i64 nMod = FTS3_SEGDIR_MAXLEVEL * p->nIndex; - sqlite3_stmt *pFindLevel = 0; /* SQL used to determine iAbsLevel */ - int bUseHint = 0; /* True if attempting to append */ - int iIdx = 0; /* Largest idx in level (iAbsLevel+1) */ + /* Loop through the table columns, appending offset information to + ** string-buffer res for each column. + */ + for(iCol=0; iCol<pTab->nColumn; iCol++){ + sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ + const char *ZDUMMY; /* Dummy argument used with xNext() */ + int NDUMMY = 0; /* Dummy argument used with xNext() */ + int iStart = 0; + int iEnd = 0; + int iCurrent = 0; + const char *zDoc; + int nDoc; - /* Search the %_segdir table for the absolute level with the smallest - ** relative level number that contains at least nMin segments, if any. - ** If one is found, set iAbsLevel to the absolute level number and - ** nSeg to nMin. If no level with at least nMin segments can be found, - ** set nSeg to -1. + /* Initialize the contents of sCtx.aTerm[] for column iCol. There is + ** no way that this operation can fail, so the return code from + ** fts3ExprIterate() can be discarded. */ - rc = fts3SqlStmt(p, SQL_FIND_MERGE_LEVEL, &pFindLevel, 0); - sqlite3_bind_int(pFindLevel, 1, nMin); - if( sqlite3_step(pFindLevel)==SQLITE_ROW ){ - iAbsLevel = sqlite3_column_int64(pFindLevel, 0); - nSeg = nMin; - }else{ - nSeg = -1; - } - rc = sqlite3_reset(pFindLevel); + sCtx.iCol = iCol; + sCtx.iTerm = 0; + (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void*)&sCtx); - /* If the hint read from the %_stat table is not empty, check if the - ** last entry in it specifies a relative level smaller than or equal - ** to the level identified by the block above (if any). If so, this - ** iteration of the loop will work on merging at the hinted level. + /* Retreive the text stored in column iCol. If an SQL NULL is stored + ** in column iCol, jump immediately to the next iteration of the loop. + ** If an OOM occurs while retrieving the data (this can happen if SQLite + ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM + ** to the caller. */ - if( rc==SQLITE_OK && hint.n ){ - int nHint = hint.n; - sqlite3_int64 iHintAbsLevel = 0; /* Hint level */ - int nHintSeg = 0; /* Hint number of segments */ - - rc = fts3IncrmergeHintPop(&hint, &iHintAbsLevel, &nHintSeg); - if( nSeg<0 || (iAbsLevel % nMod) >= (iHintAbsLevel % nMod) ){ - iAbsLevel = iHintAbsLevel; - nSeg = nHintSeg; - bUseHint = 1; - bDirtyHint = 1; - }else{ - /* This undoes the effect of the HintPop() above - so that no entry - ** is removed from the hint blob. */ - hint.n = nHint; - } - } - - /* If nSeg is less that zero, then there is no level with at least - ** nMin segments and no hint in the %_stat table. No work to do. - ** Exit early in this case. */ - if( nSeg<0 ) break; - - /* Open a cursor to iterate through the contents of the oldest nSeg - ** indexes of absolute level iAbsLevel. If this cursor is opened using - ** the 'hint' parameters, it is possible that there are less than nSeg - ** segments available in level iAbsLevel. In this case, no work is - ** done on iAbsLevel - fall through to the next iteration of the loop - ** to start work on some other level. */ - memset(pWriter, 0, nAlloc); - pFilter->flags = FTS3_SEGMENT_REQUIRE_POS; - - if( rc==SQLITE_OK ){ - rc = fts3IncrmergeOutputIdx(p, iAbsLevel, &iIdx); - assert( bUseHint==1 || bUseHint==0 ); - if( iIdx==0 || (bUseHint && iIdx==1) ){ - int bIgnore = 0; - rc = fts3SegmentIsMaxLevel(p, iAbsLevel+1, &bIgnore); - if( bIgnore ){ - pFilter->flags |= FTS3_SEGMENT_IGNORE_EMPTY; - } + zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); + nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); + if( zDoc==0 ){ + if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ + continue; } + rc = SQLITE_NOMEM; + goto offsets_out; } - if( rc==SQLITE_OK ){ - rc = fts3IncrmergeCsr(p, iAbsLevel, nSeg, pCsr); - } - if( SQLITE_OK==rc && pCsr->nSegment==nSeg - && SQLITE_OK==(rc = sqlite3Fts3SegReaderStart(p, pCsr, pFilter)) - && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pCsr)) - ){ - if( bUseHint && iIdx>0 ){ - const char *zKey = pCsr->zTerm; - int nKey = pCsr->nTerm; - rc = fts3IncrmergeLoad(p, iAbsLevel, iIdx-1, zKey, nKey, pWriter); - }else{ - rc = fts3IncrmergeWriter(p, iAbsLevel, iIdx, pCsr, pWriter); - } + /* Initialize a tokenizer iterator to iterate through column iCol. */ + rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, + zDoc, nDoc, &pC + ); + if( rc!=SQLITE_OK ) goto offsets_out; - if( rc==SQLITE_OK && pWriter->nLeafEst ){ - fts3LogMerge(nSeg, iAbsLevel); - do { - rc = fts3IncrmergeAppend(p, pWriter, pCsr); - if( rc==SQLITE_OK ) rc = sqlite3Fts3SegReaderStep(p, pCsr); - if( pWriter->nWork>=nRem && rc==SQLITE_ROW ) rc = SQLITE_OK; - }while( rc==SQLITE_ROW ); + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + while( rc==SQLITE_OK ){ + int i; /* Used to loop through terms */ + int iMinPos = 0x7FFFFFFF; /* Position of next token */ + TermOffset *pTerm = 0; /* TermOffset associated with next token */ - /* Update or delete the input segments */ - if( rc==SQLITE_OK ){ - nRem -= (1 + pWriter->nWork); - rc = fts3IncrmergeChomp(p, iAbsLevel, pCsr, &nSeg); - if( nSeg!=0 ){ - bDirtyHint = 1; - fts3IncrmergeHintPush(&hint, iAbsLevel, nSeg, &rc); - } + for(i=0; i<nToken; i++){ + TermOffset *pT = &sCtx.aTerm[i]; + if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ + iMinPos = pT->iPos-pT->iOff; + pTerm = pT; } } - if( nSeg!=0 ){ - pWriter->nLeafData = pWriter->nLeafData * -1; - } - fts3IncrmergeRelease(p, pWriter, &rc); - if( nSeg==0 && pWriter->bNoLeafData==0 ){ - fts3PromoteSegments(p, iAbsLevel+1, pWriter->nLeafData); + if( !pTerm ){ + /* All offsets for this column have been gathered. */ + rc = SQLITE_DONE; + }else{ + assert( iCurrent<=iMinPos ); + if( 0==(0xFE&*pTerm->pList) ){ + pTerm->pList = 0; + }else{ + fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); + } + while( rc==SQLITE_OK && iCurrent<iMinPos ){ + rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); + } + if( rc==SQLITE_OK ){ + char aBuffer[64]; + sqlite3_snprintf(sizeof(aBuffer), aBuffer, + "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart + ); + rc = fts3StringAppend(&res, aBuffer, -1); + }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ + rc = FTS_CORRUPT_VTAB; + } } } + if( rc==SQLITE_DONE ){ + rc = SQLITE_OK; + } - sqlite3Fts3SegReaderFinish(pCsr); + pMod->xClose(pC); + if( rc!=SQLITE_OK ) goto offsets_out; } - /* Write the hint values into the %_stat table for the next incr-merger */ - if( bDirtyHint && rc==SQLITE_OK ){ - rc = fts3IncrmergeHintStore(p, &hint); + offsets_out: + sqlite3_free(sCtx.aTerm); + assert( rc!=SQLITE_DONE ); + sqlite3Fts3SegmentsClose(pTab); + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + sqlite3_free(res.z); + }else{ + sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); } - - sqlite3_free(pWriter); - sqlite3_free(hint.a); - return rc; -} - -/* -** Convert the text beginning at *pz into an integer and return -** its value. Advance *pz to point to the first character past -** the integer. -*/ -static int fts3Getint(const char **pz){ - const char *z = *pz; - int i = 0; - while( (*z)>='0' && (*z)<='9' ) i = 10*i + *(z++) - '0'; - *pz = z; - return i; + return; } /* -** Process statements of the form: -** -** INSERT INTO table(table) VALUES('merge=A,B'); -** -** A and B are integers that decode to be the number of leaf pages -** written for the merge, and the minimum number of segments on a level -** before it will be selected for a merge, respectively. +** Implementation of matchinfo() function. */ -static int fts3DoIncrmerge( - Fts3Table *p, /* FTS3 table handle */ - const char *zParam /* Nul-terminated string containing "A,B" */ +SQLITE_PRIVATE void sqlite3Fts3Matchinfo( + sqlite3_context *pContext, /* Function call context */ + Fts3Cursor *pCsr, /* FTS3 table cursor */ + const char *zArg /* Second arg to matchinfo() function */ ){ - int rc; - int nMin = (FTS3_MERGE_COUNT / 2); - int nMerge = 0; - const char *z = zParam; - - /* Read the first integer value */ - nMerge = fts3Getint(&z); + Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; + const char *zFormat; - /* If the first integer value is followed by a ',', read the second - ** integer value. */ - if( z[0]==',' && z[1]!='\0' ){ - z++; - nMin = fts3Getint(&z); + if( zArg ){ + zFormat = zArg; + }else{ + zFormat = FTS3_MATCHINFO_DEFAULT; } - if( z[0]!='\0' || nMin<2 ){ - rc = SQLITE_ERROR; + if( !pCsr->pExpr ){ + sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); + return; }else{ - rc = SQLITE_OK; - if( !p->bHasStat ){ - assert( p->bFts4==0 ); - sqlite3Fts3CreateStatTable(&rc, p); - } - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3Incrmerge(p, nMerge, nMin); - } - sqlite3Fts3SegmentsClose(p); + /* Retrieve matchinfo() data. */ + fts3GetMatchinfo(pContext, pCsr, zFormat); + sqlite3Fts3SegmentsClose(pTab); } - return rc; } +#endif + +/************** End of fts3_snippet.c ****************************************/ +/************** Begin file fts3_unicode.c ************************************/ /* -** Process statements of the form: +** 2012 May 24 ** -** INSERT INTO table(table) VALUES('automerge=X'); +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: ** -** where X is an integer. X==0 means to turn automerge off. X!=0 means -** turn it on. The setting is persistent. +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +** +** Implementation of the "unicode" full-text-search tokenizer. */ -static int fts3DoAutoincrmerge( - Fts3Table *p, /* FTS3 table handle */ - const char *zParam /* Nul-terminated string containing boolean */ -){ - int rc = SQLITE_OK; - sqlite3_stmt *pStmt = 0; - p->nAutoincrmerge = fts3Getint(&zParam); - if( p->nAutoincrmerge==1 || p->nAutoincrmerge>FTS3_MERGE_COUNT ){ - p->nAutoincrmerge = 8; - } - if( !p->bHasStat ){ - assert( p->bFts4==0 ); - sqlite3Fts3CreateStatTable(&rc, p); - if( rc ) return rc; - } - rc = fts3SqlStmt(p, SQL_REPLACE_STAT, &pStmt, 0); - if( rc ) return rc; - sqlite3_bind_int(pStmt, 1, FTS_STAT_AUTOINCRMERGE); - sqlite3_bind_int(pStmt, 2, p->nAutoincrmerge); - sqlite3_step(pStmt); - rc = sqlite3_reset(pStmt); - return rc; -} + +#ifndef SQLITE_DISABLE_FTS3_UNICODE + +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) + +/* #include <assert.h> */ +/* #include <stdlib.h> */ +/* #include <stdio.h> */ +/* #include <string.h> */ + +/* #include "fts3_tokenizer.h" */ /* -** Return a 64-bit checksum for the FTS index entry specified by the -** arguments to this function. +** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied +** from the sqlite3 source file utf.c. If this file is compiled as part +** of the amalgamation, they are not required. */ -static u64 fts3ChecksumEntry( - const char *zTerm, /* Pointer to buffer containing term */ - int nTerm, /* Size of zTerm in bytes */ - int iLangid, /* Language id for current row */ - int iIndex, /* Index (0..Fts3Table.nIndex-1) */ - i64 iDocid, /* Docid for current row. */ - int iCol, /* Column number */ - int iPos /* Position */ -){ - int i; - u64 ret = (u64)iDocid; +#ifndef SQLITE_AMALGAMATION - ret += (ret<<3) + iLangid; - ret += (ret<<3) + iIndex; - ret += (ret<<3) + iCol; - ret += (ret<<3) + iPos; - for(i=0; i<nTerm; i++) ret += (ret<<3) + zTerm[i]; +static const unsigned char sqlite3Utf8Trans1[] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, +}; - return ret; +#define READ_UTF8(zIn, zTerm, c) \ + c = *(zIn++); \ + if( c>=0xc0 ){ \ + c = sqlite3Utf8Trans1[c-0xc0]; \ + while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ + c = (c<<6) + (0x3f & *(zIn++)); \ + } \ + if( c<0x80 \ + || (c&0xFFFFF800)==0xD800 \ + || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + } + +#define WRITE_UTF8(zOut, c) { \ + if( c<0x00080 ){ \ + *zOut++ = (u8)(c&0xFF); \ + } \ + else if( c<0x00800 ){ \ + *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ + else if( c<0x10000 ){ \ + *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + }else{ \ + *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ + *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ + *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ + *zOut++ = 0x80 + (u8)(c & 0x3F); \ + } \ +} + +#endif /* ifndef SQLITE_AMALGAMATION */ + +typedef struct unicode_tokenizer unicode_tokenizer; +typedef struct unicode_cursor unicode_cursor; + +struct unicode_tokenizer { + sqlite3_tokenizer base; + int bRemoveDiacritic; + int nException; + int *aiException; +}; + +struct unicode_cursor { + sqlite3_tokenizer_cursor base; + const unsigned char *aInput; /* Input text being tokenized */ + int nInput; /* Size of aInput[] in bytes */ + int iOff; /* Current offset within aInput[] */ + int iToken; /* Index of next token to be returned */ + char *zToken; /* storage for current token */ + int nAlloc; /* space allocated at zToken */ +}; + + +/* +** Destroy a tokenizer allocated by unicodeCreate(). +*/ +static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ + if( pTokenizer ){ + unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; + sqlite3_free(p->aiException); + sqlite3_free(p); + } + return SQLITE_OK; } /* -** Return a checksum of all entries in the FTS index that correspond to -** language id iLangid. The checksum is calculated by XORing the checksums -** of each individual entry (see fts3ChecksumEntry()) together. +** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE +** statement has specified that the tokenizer for this table shall consider +** all characters in string zIn/nIn to be separators (if bAlnum==0) or +** token characters (if bAlnum==1). ** -** If successful, the checksum value is returned and *pRc set to SQLITE_OK. -** Otherwise, if an error occurs, *pRc is set to an SQLite error code. The -** return value is undefined in this case. +** For each codepoint in the zIn/nIn string, this function checks if the +** sqlite3FtsUnicodeIsalnum() function already returns the desired result. +** If so, no action is taken. Otherwise, the codepoint is added to the +** unicode_tokenizer.aiException[] array. For the purposes of tokenization, +** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all +** codepoints in the aiException[] array. +** +** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() +** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. +** It is not possible to change the behavior of the tokenizer with respect +** to these codepoints. */ -static u64 fts3ChecksumIndex( - Fts3Table *p, /* FTS3 table handle */ - int iLangid, /* Language id to return cksum for */ - int iIndex, /* Index to cksum (0..p->nIndex-1) */ - int *pRc /* OUT: Return code */ +static int unicodeAddExceptions( + unicode_tokenizer *p, /* Tokenizer to add exceptions to */ + int bAlnum, /* Replace Isalnum() return value with this */ + const char *zIn, /* Array of characters to make exceptions */ + int nIn /* Length of z in bytes */ ){ - Fts3SegFilter filter; - Fts3MultiSegReader csr; - int rc; - u64 cksum = 0; - - assert( *pRc==SQLITE_OK ); + const unsigned char *z = (const unsigned char *)zIn; + const unsigned char *zTerm = &z[nIn]; + int iCode; + int nEntry = 0; - memset(&filter, 0, sizeof(filter)); - memset(&csr, 0, sizeof(csr)); - filter.flags = FTS3_SEGMENT_REQUIRE_POS|FTS3_SEGMENT_IGNORE_EMPTY; - filter.flags |= FTS3_SEGMENT_SCAN; + assert( bAlnum==0 || bAlnum==1 ); - rc = sqlite3Fts3SegReaderCursor( - p, iLangid, iIndex, FTS3_SEGCURSOR_ALL, 0, 0, 0, 1,&csr - ); - if( rc==SQLITE_OK ){ - rc = sqlite3Fts3SegReaderStart(p, &csr, &filter); + while( z<zTerm ){ + READ_UTF8(z, zTerm, iCode); + assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum + && sqlite3FtsUnicodeIsdiacritic(iCode)==0 + ){ + nEntry++; + } } - if( rc==SQLITE_OK ){ - while( SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, &csr)) ){ - char *pCsr = csr.aDoclist; - char *pEnd = &pCsr[csr.nDoclist]; + if( nEntry ){ + int *aNew; /* New aiException[] array */ + int nNew; /* Number of valid entries in array aNew[] */ - i64 iDocid = 0; - i64 iCol = 0; - i64 iPos = 0; + aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int)); + if( aNew==0 ) return SQLITE_NOMEM; + nNew = p->nException; - pCsr += sqlite3Fts3GetVarint(pCsr, &iDocid); - while( pCsr<pEnd ){ - i64 iVal = 0; - pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); - if( pCsr<pEnd ){ - if( iVal==0 || iVal==1 ){ - iCol = 0; - iPos = 0; - if( iVal ){ - pCsr += sqlite3Fts3GetVarint(pCsr, &iCol); - }else{ - pCsr += sqlite3Fts3GetVarint(pCsr, &iVal); - iDocid += iVal; - } - }else{ - iPos += (iVal - 2); - cksum = cksum ^ fts3ChecksumEntry( - csr.zTerm, csr.nTerm, iLangid, iIndex, iDocid, - (int)iCol, (int)iPos - ); - } - } + z = (const unsigned char *)zIn; + while( z<zTerm ){ + READ_UTF8(z, zTerm, iCode); + if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum + && sqlite3FtsUnicodeIsdiacritic(iCode)==0 + ){ + int i, j; + for(i=0; i<nNew && aNew[i]<iCode; i++); + for(j=nNew; j>i; j--) aNew[j] = aNew[j-1]; + aNew[i] = iCode; + nNew++; } } + p->aiException = aNew; + p->nException = nNew; } - sqlite3Fts3SegReaderFinish(&csr); - *pRc = rc; - return cksum; + return SQLITE_OK; } /* -** Check if the contents of the FTS index match the current contents of the -** content table. If no error occurs and the contents do match, set *pbOk -** to true and return SQLITE_OK. Or if the contents do not match, set *pbOk -** to false before returning. -** -** If an error occurs (e.g. an OOM or IO error), return an SQLite error -** code. The final value of *pbOk is undefined in this case. +** Return true if the p->aiException[] array contains the value iCode. */ -static int fts3IntegrityCheck(Fts3Table *p, int *pbOk){ - int rc = SQLITE_OK; /* Return code */ - u64 cksum1 = 0; /* Checksum based on FTS index contents */ - u64 cksum2 = 0; /* Checksum based on %_content contents */ - sqlite3_stmt *pAllLangid = 0; /* Statement to return all language-ids */ +static int unicodeIsException(unicode_tokenizer *p, int iCode){ + if( p->nException>0 ){ + int *a = p->aiException; + int iLo = 0; + int iHi = p->nException-1; - /* This block calculates the checksum according to the FTS index. */ - rc = fts3SqlStmt(p, SQL_SELECT_ALL_LANGID, &pAllLangid, 0); - if( rc==SQLITE_OK ){ - int rc2; - sqlite3_bind_int(pAllLangid, 1, p->iPrevLangid); - sqlite3_bind_int(pAllLangid, 2, p->nIndex); - while( rc==SQLITE_OK && sqlite3_step(pAllLangid)==SQLITE_ROW ){ - int iLangid = sqlite3_column_int(pAllLangid, 0); - int i; - for(i=0; i<p->nIndex; i++){ - cksum1 = cksum1 ^ fts3ChecksumIndex(p, iLangid, i, &rc); + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( iCode==a[iTest] ){ + return 1; + }else if( iCode>a[iTest] ){ + iLo = iTest+1; + }else{ + iHi = iTest-1; } } - rc2 = sqlite3_reset(pAllLangid); - if( rc==SQLITE_OK ) rc = rc2; } - /* This block calculates the checksum according to the %_content table */ - if( rc==SQLITE_OK ){ - sqlite3_tokenizer_module const *pModule = p->pTokenizer->pModule; - sqlite3_stmt *pStmt = 0; - char *zSql; - - zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist); - if( !zSql ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); - sqlite3_free(zSql); - } + return 0; +} - while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ - i64 iDocid = sqlite3_column_int64(pStmt, 0); - int iLang = langidFromSelect(p, pStmt); - int iCol; +/* +** Return true if, for the purposes of tokenization, codepoint iCode is +** considered a token character (not a separator). +*/ +static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ + assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); + return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); +} - for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){ - if( p->abNotindexed[iCol]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pStmt, iCol+1); - int nText = sqlite3_column_bytes(pStmt, iCol+1); - sqlite3_tokenizer_cursor *pT = 0; +/* +** Create a new tokenizer instance. +*/ +static int unicodeCreate( + int nArg, /* Size of array argv[] */ + const char * const *azArg, /* Tokenizer creation arguments */ + sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ +){ + unicode_tokenizer *pNew; /* New tokenizer object */ + int i; + int rc = SQLITE_OK; - rc = sqlite3Fts3OpenTokenizer(p->pTokenizer, iLang, zText, nText,&pT); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ + pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); + if( pNew==NULL ) return SQLITE_NOMEM; + memset(pNew, 0, sizeof(unicode_tokenizer)); + pNew->bRemoveDiacritic = 1; - rc = pModule->xNext(pT, &zToken, &nToken, &iDum1, &iDum2, &iPos); - if( rc==SQLITE_OK ){ - int i; - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, nToken, iLang, 0, iDocid, iCol, iPos - ); - for(i=1; i<p->nIndex; i++){ - if( p->aIndex[i].nPrefix<=nToken ){ - cksum2 = cksum2 ^ fts3ChecksumEntry( - zToken, p->aIndex[i].nPrefix, iLang, i, iDocid, iCol, iPos - ); - } - } - } - } - if( pT ) pModule->xClose(pT); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } - } - } + for(i=0; rc==SQLITE_OK && i<nArg; i++){ + const char *z = azArg[i]; + int n = (int)strlen(z); - sqlite3_finalize(pStmt); + if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){ + pNew->bRemoveDiacritic = 1; + } + else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ + pNew->bRemoveDiacritic = 0; + } + else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); + } + else if( n>=11 && memcmp("separators=", z, 11)==0 ){ + rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); + } + else{ + /* Unrecognized argument */ + rc = SQLITE_ERROR; + } } - *pbOk = (cksum1==cksum2); + if( rc!=SQLITE_OK ){ + unicodeDestroy((sqlite3_tokenizer *)pNew); + pNew = 0; + } + *pp = (sqlite3_tokenizer *)pNew; return rc; } /* -** Run the integrity-check. If no error occurs and the current contents of -** the FTS index are correct, return SQLITE_OK. Or, if the contents of the -** FTS index are incorrect, return SQLITE_CORRUPT_VTAB. -** -** Or, if an error (e.g. an OOM or IO error) occurs, return an SQLite -** error code. -** -** The integrity-check works as follows. For each token and indexed token -** prefix in the document set, a 64-bit checksum is calculated (by code -** in fts3ChecksumEntry()) based on the following: -** -** + The index number (0 for the main index, 1 for the first prefix -** index etc.), -** + The token (or token prefix) text itself, -** + The language-id of the row it appears in, -** + The docid of the row it appears in, -** + The column it appears in, and -** + The tokens position within that column. -** -** The checksums for all entries in the index are XORed together to create -** a single checksum for the entire index. -** -** The integrity-check code calculates the same checksum in two ways: -** -** 1. By scanning the contents of the FTS index, and -** 2. By scanning and tokenizing the content table. -** -** If the two checksums are identical, the integrity-check is deemed to have -** passed. +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. */ -static int fts3DoIntegrityCheck( - Fts3Table *p /* FTS3 table handle */ +static int unicodeOpen( + sqlite3_tokenizer *p, /* The tokenizer */ + const char *aInput, /* Input string */ + int nInput, /* Size of string aInput in bytes */ + sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ ){ - int rc; - int bOk = 0; - rc = fts3IntegrityCheck(p, &bOk); - if( rc==SQLITE_OK && bOk==0 ) rc = FTS_CORRUPT_VTAB; - return rc; -} - -/* -** Handle a 'special' INSERT of the form: -** -** "INSERT INTO tbl(tbl) VALUES(<expr>)" -** -** Argument pVal contains the result of <expr>. Currently the only -** meaningful value to insert is the text 'optimize'. -*/ -static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){ - int rc; /* Return Code */ - const char *zVal = (const char *)sqlite3_value_text(pVal); - int nVal = sqlite3_value_bytes(pVal); + unicode_cursor *pCsr; - if( !zVal ){ + pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); + if( pCsr==0 ){ return SQLITE_NOMEM; - }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){ - rc = fts3DoOptimize(p, 0); - }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){ - rc = fts3DoRebuild(p); - }else if( nVal==15 && 0==sqlite3_strnicmp(zVal, "integrity-check", 15) ){ - rc = fts3DoIntegrityCheck(p); - }else if( nVal>6 && 0==sqlite3_strnicmp(zVal, "merge=", 6) ){ - rc = fts3DoIncrmerge(p, &zVal[6]); - }else if( nVal>10 && 0==sqlite3_strnicmp(zVal, "automerge=", 10) ){ - rc = fts3DoAutoincrmerge(p, &zVal[10]); -#ifdef SQLITE_TEST - }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){ - p->nNodeSize = atoi(&zVal[9]); - rc = SQLITE_OK; - }else if( nVal>11 && 0==sqlite3_strnicmp(zVal, "maxpending=", 9) ){ - p->nMaxPendingData = atoi(&zVal[11]); - rc = SQLITE_OK; - }else if( nVal>21 && 0==sqlite3_strnicmp(zVal, "test-no-incr-doclist=", 21) ){ - p->bNoIncrDoclist = atoi(&zVal[21]); - rc = SQLITE_OK; -#endif + } + memset(pCsr, 0, sizeof(unicode_cursor)); + + pCsr->aInput = (const unsigned char *)aInput; + if( aInput==0 ){ + pCsr->nInput = 0; + }else if( nInput<0 ){ + pCsr->nInput = (int)strlen(aInput); }else{ - rc = SQLITE_ERROR; + pCsr->nInput = nInput; } - return rc; + *pp = &pCsr->base; + UNUSED_PARAMETER(p); + return SQLITE_OK; } -#ifndef SQLITE_DISABLE_FTS4_DEFERRED /* -** Delete all cached deferred doclists. Deferred doclists are cached -** (allocated) by the sqlite3Fts3CacheDeferredDoclists() function. +** Close a tokenization cursor previously opened by a call to +** simpleOpen() above. */ -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredDoclists(Fts3Cursor *pCsr){ - Fts3DeferredToken *pDef; - for(pDef=pCsr->pDeferred; pDef; pDef=pDef->pNext){ - fts3PendingListDelete(pDef->pList); - pDef->pList = 0; - } +static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ + unicode_cursor *pCsr = (unicode_cursor *) pCursor; + sqlite3_free(pCsr->zToken); + sqlite3_free(pCsr); + return SQLITE_OK; } /* -** Free all entries in the pCsr->pDeffered list. Entries are added to -** this list using sqlite3Fts3DeferToken(). +** Extract the next token from a tokenization cursor. The cursor must +** have been opened by a prior call to simpleOpen(). */ -SQLITE_PRIVATE void sqlite3Fts3FreeDeferredTokens(Fts3Cursor *pCsr){ - Fts3DeferredToken *pDef; - Fts3DeferredToken *pNext; - for(pDef=pCsr->pDeferred; pDef; pDef=pNext){ - pNext = pDef->pNext; - fts3PendingListDelete(pDef->pList); - sqlite3_free(pDef); - } - pCsr->pDeferred = 0; -} +static int unicodeNext( + sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ + const char **paToken, /* OUT: Token text */ + int *pnToken, /* OUT: Number of bytes at *paToken */ + int *piStart, /* OUT: Starting offset of token */ + int *piEnd, /* OUT: Ending offset of token */ + int *piPos /* OUT: Position integer of token */ +){ + unicode_cursor *pCsr = (unicode_cursor *)pC; + unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); + int iCode = 0; + char *zOut; + const unsigned char *z = &pCsr->aInput[pCsr->iOff]; + const unsigned char *zStart = z; + const unsigned char *zEnd; + const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; -/* -** Generate deferred-doclists for all tokens in the pCsr->pDeferred list -** based on the row that pCsr currently points to. -** -** A deferred-doclist is like any other doclist with position information -** included, except that it only contains entries for a single row of the -** table, not for all rows. -*/ -SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){ - int rc = SQLITE_OK; /* Return code */ - if( pCsr->pDeferred ){ - int i; /* Used to iterate through table columns */ - sqlite3_int64 iDocid; /* Docid of the row pCsr points to */ - Fts3DeferredToken *pDef; /* Used to iterate through deferred tokens */ - - Fts3Table *p = (Fts3Table *)pCsr->base.pVtab; - sqlite3_tokenizer *pT = p->pTokenizer; - sqlite3_tokenizer_module const *pModule = pT->pModule; - - assert( pCsr->isRequireSeek==0 ); - iDocid = sqlite3_column_int64(pCsr->pStmt, 0); - - for(i=0; i<p->nColumn && rc==SQLITE_OK; i++){ - if( p->abNotindexed[i]==0 ){ - const char *zText = (const char *)sqlite3_column_text(pCsr->pStmt, i+1); - sqlite3_tokenizer_cursor *pTC = 0; + /* Scan past any delimiter characters before the start of the next token. + ** Return SQLITE_DONE early if this takes us all the way to the end of + ** the input. */ + while( z<zTerm ){ + READ_UTF8(z, zTerm, iCode); + if( unicodeIsAlnum(p, iCode) ) break; + zStart = z; + } + if( zStart>=zTerm ) return SQLITE_DONE; - rc = sqlite3Fts3OpenTokenizer(pT, pCsr->iLangid, zText, -1, &pTC); - while( rc==SQLITE_OK ){ - char const *zToken; /* Buffer containing token */ - int nToken = 0; /* Number of bytes in token */ - int iDum1 = 0, iDum2 = 0; /* Dummy variables */ - int iPos = 0; /* Position of token in zText */ + zOut = pCsr->zToken; + do { + int iOut; - rc = pModule->xNext(pTC, &zToken, &nToken, &iDum1, &iDum2, &iPos); - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - Fts3PhraseToken *pPT = pDef->pToken; - if( (pDef->iCol>=p->nColumn || pDef->iCol==i) - && (pPT->bFirst==0 || iPos==0) - && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken)) - && (0==memcmp(zToken, pPT->z, pPT->n)) - ){ - fts3PendingListAppend(&pDef->pList, iDocid, i, iPos, &rc); - } - } - } - if( pTC ) pModule->xClose(pTC); - if( rc==SQLITE_DONE ) rc = SQLITE_OK; - } + /* Grow the output buffer if required. */ + if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ + char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); + if( !zNew ) return SQLITE_NOMEM; + zOut = &zNew[zOut - pCsr->zToken]; + pCsr->zToken = zNew; + pCsr->nAlloc += 64; } - for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){ - if( pDef->pList ){ - rc = fts3PendingListAppendVarint(&pDef->pList, 0); - } + /* Write the folded case of the last character read to the output */ + zEnd = z; + iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); + if( iOut ){ + WRITE_UTF8(zOut, iOut); } - } - return rc; + /* If the cursor is not at EOF, read the next character */ + if( z>=zTerm ) break; + READ_UTF8(z, zTerm, iCode); + }while( unicodeIsAlnum(p, iCode) + || sqlite3FtsUnicodeIsdiacritic(iCode) + ); + + /* Set the output variables and return. */ + pCsr->iOff = (int)(z - pCsr->aInput); + *paToken = pCsr->zToken; + *pnToken = (int)(zOut - pCsr->zToken); + *piStart = (int)(zStart - pCsr->aInput); + *piEnd = (int)(zEnd - pCsr->aInput); + *piPos = pCsr->iToken++; + return SQLITE_OK; } -SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList( - Fts3DeferredToken *p, - char **ppData, - int *pnData -){ - char *pRet; - int nSkip; - sqlite3_int64 dummy; +/* +** Set *ppModule to a pointer to the sqlite3_tokenizer_module +** structure for the unicode tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ + static const sqlite3_tokenizer_module module = { + 0, + unicodeCreate, + unicodeDestroy, + unicodeOpen, + unicodeClose, + unicodeNext, + 0, + }; + *ppModule = &module; +} - *ppData = 0; - *pnData = 0; +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ +#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ - if( p->pList==0 ){ - return SQLITE_OK; - } +/************** End of fts3_unicode.c ****************************************/ +/************** Begin file fts3_unicode2.c ***********************************/ +/* +** 2012 May 25 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +****************************************************************************** +*/ - pRet = (char *)sqlite3_malloc(p->pList->nData); - if( !pRet ) return SQLITE_NOMEM; +/* +** DO NOT EDIT THIS MACHINE GENERATED FILE. +*/ - nSkip = sqlite3Fts3GetVarint(p->pList->aData, &dummy); - *pnData = p->pList->nData - nSkip; - *ppData = pRet; - - memcpy(pRet, &p->pList->aData[nSkip], *pnData); - return SQLITE_OK; -} +#ifndef SQLITE_DISABLE_FTS3_UNICODE +#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) + +/* #include <assert.h> */ /* -** Add an entry for token pToken to the pCsr->pDeferred list. +** Return true if the argument corresponds to a unicode codepoint +** classified as either a letter or a number. Otherwise false. +** +** The results are undefined if the value passed to this function +** is less than zero. */ -SQLITE_PRIVATE int sqlite3Fts3DeferToken( - Fts3Cursor *pCsr, /* Fts3 table cursor */ - Fts3PhraseToken *pToken, /* Token to defer */ - int iCol /* Column that token must appear in (or -1) */ -){ - Fts3DeferredToken *pDeferred; - pDeferred = sqlite3_malloc(sizeof(*pDeferred)); - if( !pDeferred ){ - return SQLITE_NOMEM; - } - memset(pDeferred, 0, sizeof(*pDeferred)); - pDeferred->pToken = pToken; - pDeferred->pNext = pCsr->pDeferred; - pDeferred->iCol = iCol; - pCsr->pDeferred = pDeferred; - - assert( pToken->pDeferred==0 ); - pToken->pDeferred = pDeferred; +SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ + /* Each unsigned integer in the following array corresponds to a contiguous + ** range of unicode codepoints that are not either letters or numbers (i.e. + ** codepoints for which this function should return 0). + ** + ** The most significant 22 bits in each 32-bit value contain the first + ** codepoint in the range. The least significant 10 bits are used to store + ** the size of the range (always at least 1). In other words, the value + ** ((C<<22) + N) represents a range of N codepoints starting with codepoint + ** C. It is not possible to represent a range larger than 1023 codepoints + ** using this format. + */ + static const unsigned int aEntry[] = { + 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, + 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, + 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, + 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, + 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, + 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, + 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, + 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, + 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, + 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, + 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, + 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, + 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, + 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, + 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, + 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, + 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, + 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, + 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, + 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, + 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, + 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, + 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, + 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, + 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, + 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, + 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, + 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, + 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, + 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, + 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, + 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, + 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, + 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, + 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, + 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, + 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, + 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, + 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, + 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, + 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, + 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, + 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, + 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, + 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, + 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, + 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, + 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, + 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, + 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, + 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, + 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, + 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, + 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, + 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, + 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, + 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, + 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, + 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, + 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, + 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, + 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, + 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, + 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, + 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, + 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, + 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, + 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, + 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, + 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, + 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, + 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, + 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, + 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, + 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, + 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, + 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, + 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, + 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, + 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, + 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, + 0x380400F0, + }; + static const unsigned int aAscii[4] = { + 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, + }; - return SQLITE_OK; + if( c<128 ){ + return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); + }else if( c<(1<<22) ){ + unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; + int iRes = 0; + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aEntry[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; + } + } + assert( aEntry[0]<key ); + assert( key>=aEntry[iRes] ); + return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + } + return 1; } -#endif + /* -** SQLite value pRowid contains the rowid of a row that may or may not be -** present in the FTS3 table. If it is, delete it and adjust the contents -** of subsiduary data structures accordingly. +** If the argument is a codepoint corresponding to a lowercase letter +** in the ASCII range with a diacritic added, return the codepoint +** of the ASCII letter only. For example, if passed 235 - "LATIN +** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER +** E"). The resuls of passing a codepoint that corresponds to an +** uppercase letter are undefined. */ -static int fts3DeleteByRowid( - Fts3Table *p, - sqlite3_value *pRowid, - int *pnChng, /* IN/OUT: Decrement if row is deleted */ - u32 *aSzDel -){ - int rc = SQLITE_OK; /* Return code */ - int bFound = 0; /* True if *pRowid really is in the table */ +static int remove_diacritic(int c){ + unsigned short aDia[] = { + 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, + 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, + 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, + 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, + 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, + 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, + 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, + 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, + 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, + 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, + 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, + 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, + 62924, 63050, 63082, 63274, 63390, + }; + char aChar[] = { + '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', + 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', + 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', + 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', + 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', + 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', + 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', + 'e', 'i', 'o', 'u', 'y', + }; - fts3DeleteTerms(&rc, p, pRowid, aSzDel, &bFound); - if( bFound && rc==SQLITE_OK ){ - int isEmpty = 0; /* Deleting *pRowid leaves the table empty */ - rc = fts3IsEmpty(p, pRowid, &isEmpty); - if( rc==SQLITE_OK ){ - if( isEmpty ){ - /* Deleting this row means the whole table is empty. In this case - ** delete the contents of all three tables and throw away any - ** data in the pendingTerms hash table. */ - rc = fts3DeleteAll(p, 1); - *pnChng = 0; - memset(aSzDel, 0, sizeof(u32) * (p->nColumn+1) * 2); - }else{ - *pnChng = *pnChng - 1; - if( p->zContentTbl==0 ){ - fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid); - } - if( p->bHasDocsize ){ - fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid); - } - } + unsigned int key = (((unsigned int)c)<<3) | 0x00000007; + int iRes = 0; + int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; + int iLo = 0; + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + if( key >= aDia[iTest] ){ + iRes = iTest; + iLo = iTest+1; + }else{ + iHi = iTest-1; } } - - return rc; + assert( key>=aDia[iRes] ); + return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); } + /* -** This function does the work for the xUpdate method of FTS3 virtual -** tables. The schema of the virtual table being: -** -** CREATE TABLE <table name>( -** <user columns>, -** <table name> HIDDEN, -** docid HIDDEN, -** <langid> HIDDEN -** ); -** -** +** Return true if the argument interpreted as a unicode codepoint +** is a diacritical modifier character. */ -SQLITE_PRIVATE int sqlite3Fts3UpdateMethod( - sqlite3_vtab *pVtab, /* FTS3 vtab object */ - int nArg, /* Size of argument array */ - sqlite3_value **apVal, /* Array of arguments */ - sqlite_int64 *pRowid /* OUT: The affected (or effected) rowid */ -){ - Fts3Table *p = (Fts3Table *)pVtab; - int rc = SQLITE_OK; /* Return Code */ - int isRemove = 0; /* True for an UPDATE or DELETE */ - u32 *aSzIns = 0; /* Sizes of inserted documents */ - u32 *aSzDel = 0; /* Sizes of deleted documents */ - int nChng = 0; /* Net change in number of documents */ - int bInsertDone = 0; - - /* At this point it must be known if the %_stat table exists or not. - ** So bHasStat may not be 2. */ - assert( p->bHasStat==0 || p->bHasStat==1 ); +SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ + unsigned int mask0 = 0x08029FDF; + unsigned int mask1 = 0x000361F8; + if( c<768 || c>817 ) return 0; + return (c < 768+32) ? + (mask0 & (1 << (c-768))) : + (mask1 & (1 << (c-768-32))); +} - assert( p->pSegments==0 ); - assert( - nArg==1 /* DELETE operations */ - || nArg==(2 + p->nColumn + 3) /* INSERT or UPDATE operations */ - ); - /* Check for a "special" INSERT operation. One of the form: +/* +** Interpret the argument as a unicode codepoint. If the codepoint +** is an upper case character that has a lower case equivalent, +** return the codepoint corresponding to the lower case version. +** Otherwise, return a copy of the argument. +** +** The results are undefined if the value passed to this function +** is less than zero. +*/ +SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ + /* Each entry in the following array defines a rule for folding a range + ** of codepoints to lower case. The rule applies to a range of nRange + ** codepoints starting at codepoint iCode. ** - ** INSERT INTO xyz(xyz) VALUES('command'); + ** If the least significant bit in flags is clear, then the rule applies + ** to all nRange codepoints (i.e. all nRange codepoints are upper case and + ** need to be folded). Or, if it is set, then the rule only applies to + ** every second codepoint in the range, starting with codepoint C. + ** + ** The 7 most significant bits in flags are an index into the aiOff[] + ** array. If a specific codepoint C does require folding, then its lower + ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). + ** + ** The contents of this array are generated by parsing the CaseFolding.txt + ** file distributed as part of the "Unicode Character Database". See + ** http://www.unicode.org for details. */ - if( nArg>1 - && sqlite3_value_type(apVal[0])==SQLITE_NULL - && sqlite3_value_type(apVal[p->nColumn+2])!=SQLITE_NULL - ){ - rc = fts3SpecialInsert(p, apVal[p->nColumn+2]); - goto update_out; - } - - if( nArg>1 && sqlite3_value_int(apVal[2 + p->nColumn + 2])<0 ){ - rc = SQLITE_CONSTRAINT; - goto update_out; - } + static const struct TableEntry { + unsigned short iCode; + unsigned char flags; + unsigned char nRange; + } aEntry[] = { + {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, + {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, + {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, + {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, + {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, + {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, + {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, + {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, + {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, + {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, + {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, + {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, + {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, + {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, + {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, + {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, + {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, + {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, + {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, + {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, + {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, + {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, + {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, + {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, + {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, + {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, + {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, + {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, + {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, + {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, + {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, + {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, + {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, + {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, + {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, + {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, + {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, + {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, + {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, + {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, + {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, + {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, + {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, + {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, + {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, + {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, + {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, + {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, + {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, + {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, + {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, + {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, + {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, + {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, + {65313, 14, 26}, + }; + static const unsigned short aiOff[] = { + 1, 2, 8, 15, 16, 26, 28, 32, + 37, 38, 40, 48, 63, 64, 69, 71, + 79, 80, 116, 202, 203, 205, 206, 207, + 209, 210, 211, 213, 214, 217, 218, 219, + 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, + 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, + 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, + 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, + 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, + 65514, 65521, 65527, 65528, 65529, + }; - /* Allocate space to hold the change in document sizes */ - aSzDel = sqlite3_malloc( sizeof(aSzDel[0])*(p->nColumn+1)*2 ); - if( aSzDel==0 ){ - rc = SQLITE_NOMEM; - goto update_out; - } - aSzIns = &aSzDel[p->nColumn+1]; - memset(aSzDel, 0, sizeof(aSzDel[0])*(p->nColumn+1)*2); + int ret = c; - rc = fts3Writelock(p); - if( rc!=SQLITE_OK ) goto update_out; + assert( c>=0 ); + assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); - /* If this is an INSERT operation, or an UPDATE that modifies the rowid - ** value, then this operation requires constraint handling. - ** - ** If the on-conflict mode is REPLACE, this means that the existing row - ** should be deleted from the database before inserting the new row. Or, - ** if the on-conflict mode is other than REPLACE, then this method must - ** detect the conflict and return SQLITE_CONSTRAINT before beginning to - ** modify the database file. - */ - if( nArg>1 && p->zContentTbl==0 ){ - /* Find the value object that holds the new rowid value. */ - sqlite3_value *pNewRowid = apVal[3+p->nColumn]; - if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){ - pNewRowid = apVal[1]; - } + if( c<128 ){ + if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); + }else if( c<65536 ){ + int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; + int iLo = 0; + int iRes = -1; - if( sqlite3_value_type(pNewRowid)!=SQLITE_NULL && ( - sqlite3_value_type(apVal[0])==SQLITE_NULL - || sqlite3_value_int64(apVal[0])!=sqlite3_value_int64(pNewRowid) - )){ - /* The new rowid is not NULL (in this case the rowid will be - ** automatically assigned and there is no chance of a conflict), and - ** the statement is either an INSERT or an UPDATE that modifies the - ** rowid column. So if the conflict mode is REPLACE, then delete any - ** existing row with rowid=pNewRowid. - ** - ** Or, if the conflict mode is not REPLACE, insert the new record into - ** the %_content table. If we hit the duplicate rowid constraint (or any - ** other error) while doing so, return immediately. - ** - ** This branch may also run if pNewRowid contains a value that cannot - ** be losslessly converted to an integer. In this case, the eventual - ** call to fts3InsertData() (either just below or further on in this - ** function) will return SQLITE_MISMATCH. If fts3DeleteByRowid is - ** invoked, it will delete zero rows (since no row will have - ** docid=$pNewRowid if $pNewRowid is not an integer value). - */ - if( sqlite3_vtab_on_conflict(p->db)==SQLITE_REPLACE ){ - rc = fts3DeleteByRowid(p, pNewRowid, &nChng, aSzDel); + while( iHi>=iLo ){ + int iTest = (iHi + iLo) / 2; + int cmp = (c - aEntry[iTest].iCode); + if( cmp>=0 ){ + iRes = iTest; + iLo = iTest+1; }else{ - rc = fts3InsertData(p, apVal, pRowid); - bInsertDone = 1; + iHi = iTest-1; } } - } - if( rc!=SQLITE_OK ){ - goto update_out; - } + assert( iRes<0 || c>=aEntry[iRes].iCode ); - /* If this is a DELETE or UPDATE operation, remove the old record. */ - if( sqlite3_value_type(apVal[0])!=SQLITE_NULL ){ - assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER ); - rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel); - isRemove = 1; - } - - /* If this is an INSERT or UPDATE operation, insert the new record. */ - if( nArg>1 && rc==SQLITE_OK ){ - int iLangid = sqlite3_value_int(apVal[2 + p->nColumn + 2]); - if( bInsertDone==0 ){ - rc = fts3InsertData(p, apVal, pRowid); - if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){ - rc = FTS_CORRUPT_VTAB; + if( iRes>=0 ){ + const struct TableEntry *p = &aEntry[iRes]; + if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ + ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; + assert( ret>0 ); } } - if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){ - rc = fts3PendingTermsDocid(p, iLangid, *pRowid); - } - if( rc==SQLITE_OK ){ - assert( p->iPrevDocid==*pRowid ); - rc = fts3InsertTerms(p, iLangid, apVal, aSzIns); - } - if( p->bHasDocsize ){ - fts3InsertDocsize(&rc, p, aSzIns); - } - nChng++; - } - if( p->bFts4 ){ - fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nChng); + if( bRemoveDiacritic ) ret = remove_diacritic(ret); } - - update_out: - sqlite3_free(aSzDel); - sqlite3Fts3SegmentsClose(p); - return rc; -} - -/* -** Flush any data in the pending-terms hash table to disk. If successful, -** merge all segments in the database (including the new segment, if -** there was any data to flush) into a single segment. -*/ -SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ - int rc; - rc = sqlite3_exec(p->db, "SAVEPOINT fts3", 0, 0, 0); - if( rc==SQLITE_OK ){ - rc = fts3DoOptimize(p, 1); - if( rc==SQLITE_OK || rc==SQLITE_DONE ){ - int rc2 = sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); - if( rc2!=SQLITE_OK ) rc = rc2; - }else{ - sqlite3_exec(p->db, "ROLLBACK TO fts3", 0, 0, 0); - sqlite3_exec(p->db, "RELEASE fts3", 0, 0, 0); - } + + else if( c>=66560 && c<66600 ){ + ret = c + 40; } - sqlite3Fts3SegmentsClose(p); - return rc; -} -#endif + return ret; +} +#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ +#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ -/************** End of fts3_write.c ******************************************/ -/************** Begin file fts3_snippet.c ************************************/ +/************** End of fts3_unicode2.c ***************************************/ +/************** Begin file rtree.c *******************************************/ /* -** 2009 Oct 23 +** 2001 September 15 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: @@ -148633,6585 +152601,8372 @@ SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){ ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** -****************************************************************************** +************************************************************************* +** This file contains code for implementations of the r-tree and r*-tree +** algorithms packaged as an SQLite virtual table module. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +/* +** Database Format of R-Tree Tables +** -------------------------------- +** +** The data structure for a single virtual r-tree table is stored in three +** native SQLite tables declared as follows. In each case, the '%' character +** in the table name is replaced with the user-supplied name of the r-tree +** table. +** +** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) +** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) +** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) +** +** The data for each node of the r-tree structure is stored in the %_node +** table. For each node that is not the root node of the r-tree, there is +** an entry in the %_parent table associating the node with its parent. +** And for each row of data in the table, there is an entry in the %_rowid +** table that maps from the entries rowid to the id of the node that it +** is stored on. +** +** The root node of an r-tree always exists, even if the r-tree table is +** empty. The nodeno of the root node is always 1. All other nodes in the +** table must be the same size as the root node. The content of each node +** is formatted as follows: +** +** 1. If the node is the root node (node 1), then the first 2 bytes +** of the node contain the tree depth as a big-endian integer. +** For non-root nodes, the first 2 bytes are left unused. +** +** 2. The next 2 bytes contain the number of entries currently +** stored in the node. +** +** 3. The remainder of the node contains the node entries. Each entry +** consists of a single 8-byte integer followed by an even number +** of 4-byte coordinates. For leaf nodes the integer is the rowid +** of a record. For internal nodes it is the node number of a +** child page. +*/ + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) + +#ifndef SQLITE_CORE +/* #include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 +#else +/* #include "sqlite3.h" */ +#endif /* #include <string.h> */ /* #include <assert.h> */ +/* #include <stdio.h> */ -/* -** Characters that may appear in the second argument to matchinfo(). -*/ -#define FTS3_MATCHINFO_NPHRASE 'p' /* 1 value */ -#define FTS3_MATCHINFO_NCOL 'c' /* 1 value */ -#define FTS3_MATCHINFO_NDOC 'n' /* 1 value */ -#define FTS3_MATCHINFO_AVGLENGTH 'a' /* nCol values */ -#define FTS3_MATCHINFO_LENGTH 'l' /* nCol values */ -#define FTS3_MATCHINFO_LCS 's' /* nCol values */ -#define FTS3_MATCHINFO_HITS 'x' /* 3*nCol*nPhrase values */ -#define FTS3_MATCHINFO_LHITS 'y' /* nCol*nPhrase values */ +#ifndef SQLITE_AMALGAMATION +#include "sqlite3rtree.h" +typedef sqlite3_int64 i64; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +#endif -/* -** The default value for the second argument to matchinfo(). +/* The following macro is used to suppress compiler warnings. */ -#define FTS3_MATCHINFO_DEFAULT "pcx" +#ifndef UNUSED_PARAMETER +# define UNUSED_PARAMETER(x) (void)(x) +#endif + +typedef struct Rtree Rtree; +typedef struct RtreeCursor RtreeCursor; +typedef struct RtreeNode RtreeNode; +typedef struct RtreeCell RtreeCell; +typedef struct RtreeConstraint RtreeConstraint; +typedef struct RtreeMatchArg RtreeMatchArg; +typedef struct RtreeGeomCallback RtreeGeomCallback; +typedef union RtreeCoord RtreeCoord; +typedef struct RtreeSearchPoint RtreeSearchPoint; +/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ +#define RTREE_MAX_DIMENSIONS 5 -/* -** Used as an fts3ExprIterate() context when loading phrase doclists to -** Fts3Expr.aDoclist[]/nDoclist. +/* Size of hash table Rtree.aHash. This hash table is not expected to +** ever contain very many entries, so a fixed number of buckets is +** used. */ -typedef struct LoadDoclistCtx LoadDoclistCtx; -struct LoadDoclistCtx { - Fts3Cursor *pCsr; /* FTS3 Cursor */ - int nPhrase; /* Number of phrases seen so far */ - int nToken; /* Number of tokens seen so far */ -}; +#define HASHSIZE 97 -/* -** The following types are used as part of the implementation of the -** fts3BestSnippet() routine. +/* The xBestIndex method of this virtual table requires an estimate of +** the number of rows in the virtual table to calculate the costs of +** various strategies. If possible, this estimate is loaded from the +** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). +** Otherwise, if no sqlite_stat1 entry is available, use +** RTREE_DEFAULT_ROWEST. */ -typedef struct SnippetIter SnippetIter; -typedef struct SnippetPhrase SnippetPhrase; -typedef struct SnippetFragment SnippetFragment; +#define RTREE_DEFAULT_ROWEST 1048576 +#define RTREE_MIN_ROWEST 100 -struct SnippetIter { - Fts3Cursor *pCsr; /* Cursor snippet is being generated from */ - int iCol; /* Extract snippet from this column */ - int nSnippet; /* Requested snippet length (in tokens) */ - int nPhrase; /* Number of phrases in query */ - SnippetPhrase *aPhrase; /* Array of size nPhrase */ - int iCurrent; /* First token of current snippet */ -}; +/* +** An rtree virtual-table object. +*/ +struct Rtree { + sqlite3_vtab base; /* Base class. Must be first */ + sqlite3 *db; /* Host database connection */ + int iNodeSize; /* Size in bytes of each node in the node table */ + u8 nDim; /* Number of dimensions */ + u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ + u8 nBytesPerCell; /* Bytes consumed per cell */ + int iDepth; /* Current depth of the r-tree structure */ + char *zDb; /* Name of database containing r-tree table */ + char *zName; /* Name of r-tree table */ + int nBusy; /* Current number of users of this structure */ + i64 nRowEst; /* Estimated number of rows in this table */ -struct SnippetPhrase { - int nToken; /* Number of tokens in phrase */ - char *pList; /* Pointer to start of phrase position list */ - int iHead; /* Next value in position list */ - char *pHead; /* Position list data following iHead */ - int iTail; /* Next value in trailing position list */ - char *pTail; /* Position list data following iTail */ -}; + /* List of nodes removed during a CondenseTree operation. List is + ** linked together via the pointer normally used for hash chains - + ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree + ** headed by the node (leaf nodes have RtreeNode.iNode==0). + */ + RtreeNode *pDeleted; + int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ -struct SnippetFragment { - int iCol; /* Column snippet is extracted from */ - int iPos; /* Index of first token in snippet */ - u64 covered; /* Mask of query phrases covered */ - u64 hlmask; /* Mask of snippet terms to highlight */ -}; + /* Statements to read/write/delete a record from xxx_node */ + sqlite3_stmt *pReadNode; + sqlite3_stmt *pWriteNode; + sqlite3_stmt *pDeleteNode; -/* -** This type is used as an fts3ExprIterate() context object while -** accumulating the data returned by the matchinfo() function. -*/ -typedef struct MatchInfo MatchInfo; -struct MatchInfo { - Fts3Cursor *pCursor; /* FTS3 Cursor */ - int nCol; /* Number of columns in table */ - int nPhrase; /* Number of matchable phrases in query */ - sqlite3_int64 nDoc; /* Number of docs in database */ - u32 *aMatchinfo; /* Pre-allocated buffer */ + /* Statements to read/write/delete a record from xxx_rowid */ + sqlite3_stmt *pReadRowid; + sqlite3_stmt *pWriteRowid; + sqlite3_stmt *pDeleteRowid; + + /* Statements to read/write/delete a record from xxx_parent */ + sqlite3_stmt *pReadParent; + sqlite3_stmt *pWriteParent; + sqlite3_stmt *pDeleteParent; + + RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ }; +/* Possible values for Rtree.eCoordType: */ +#define RTREE_COORD_REAL32 0 +#define RTREE_COORD_INT32 1 +/* +** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will +** only deal with integer coordinates. No floating point operations +** will be done. +*/ +#ifdef SQLITE_RTREE_INT_ONLY + typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ + typedef int RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0 +#else + typedef double RtreeDValue; /* High accuracy coordinate */ + typedef float RtreeValue; /* Low accuracy coordinate */ +# define RTREE_ZERO 0.0 +#endif /* -** The snippet() and offsets() functions both return text values. An instance -** of the following structure is used to accumulate those values while the -** functions are running. See fts3StringAppend() for details. +** When doing a search of an r-tree, instances of the following structure +** record intermediate results from the tree walk. +** +** The id is always a node-id. For iLevel>=1 the id is the node-id of +** the node that the RtreeSearchPoint represents. When iLevel==0, however, +** the id is of the parent node and the cell that RtreeSearchPoint +** represents is the iCell-th entry in the parent node. */ -typedef struct StrBuffer StrBuffer; -struct StrBuffer { - char *z; /* Pointer to buffer containing string */ - int n; /* Length of z in bytes (excl. nul-term) */ - int nAlloc; /* Allocated size of buffer z in bytes */ +struct RtreeSearchPoint { + RtreeDValue rScore; /* The score for this node. Smallest goes first. */ + sqlite3_int64 id; /* Node ID */ + u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ + u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ + u8 iCell; /* Cell index within the node */ }; - /* -** This function is used to help iterate through a position-list. A position -** list is a list of unique integers, sorted from smallest to largest. Each -** element of the list is represented by an FTS3 varint that takes the value -** of the difference between the current element and the previous one plus -** two. For example, to store the position-list: -** -** 4 9 113 -** -** the three varints: -** -** 6 7 106 +** The minimum number of cells allowed for a node is a third of the +** maximum. In Gutman's notation: ** -** are encoded. +** m = M/3 ** -** When this function is called, *pp points to the start of an element of -** the list. *piPos contains the value of the previous entry in the list. -** After it returns, *piPos contains the value of the next element of the -** list and *pp is advanced to the following varint. +** If an R*-tree "Reinsert" operation is required, the same number of +** cells are removed from the overfull node and reinserted into the tree. */ -static void fts3GetDeltaPosition(char **pp, int *piPos){ - int iVal; - *pp += fts3GetVarint32(*pp, &iVal); - *piPos += (iVal-2); -} +#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) +#define RTREE_REINSERT(p) RTREE_MINCELLS(p) +#define RTREE_MAXCELLS 51 /* -** Helper function for fts3ExprIterate() (see below). +** The smallest possible node-size is (512-64)==448 bytes. And the largest +** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). +** Therefore all non-root nodes must contain at least 3 entries. Since +** 2^40 is greater than 2^64, an r-tree structure always has a depth of +** 40 or less. */ -static int fts3ExprIterate2( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int *piPhrase, /* Pointer to phrase counter */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int rc; /* Return code */ - int eType = pExpr->eType; /* Type of expression node pExpr */ +#define RTREE_MAX_DEPTH 40 - if( eType!=FTSQUERY_PHRASE ){ - assert( pExpr->pLeft && pExpr->pRight ); - rc = fts3ExprIterate2(pExpr->pLeft, piPhrase, x, pCtx); - if( rc==SQLITE_OK && eType!=FTSQUERY_NOT ){ - rc = fts3ExprIterate2(pExpr->pRight, piPhrase, x, pCtx); - } - }else{ - rc = x(pExpr, *piPhrase, pCtx); - (*piPhrase)++; - } - return rc; -} /* -** Iterate through all phrase nodes in an FTS3 query, except those that -** are part of a sub-tree that is the right-hand-side of a NOT operator. -** For each phrase node found, the supplied callback function is invoked. -** -** If the callback function returns anything other than SQLITE_OK, -** the iteration is abandoned and the error code returned immediately. -** Otherwise, SQLITE_OK is returned after a callback has been made for -** all eligible phrase nodes. +** Number of entries in the cursor RtreeNode cache. The first entry is +** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining +** entries cache the RtreeNode for the first elements of the priority queue. */ -static int fts3ExprIterate( - Fts3Expr *pExpr, /* Expression to iterate phrases of */ - int (*x)(Fts3Expr*,int,void*), /* Callback function to invoke for phrases */ - void *pCtx /* Second argument to pass to callback */ -){ - int iPhrase = 0; /* Variable used as the phrase counter */ - return fts3ExprIterate2(pExpr, &iPhrase, x, pCtx); -} +#define RTREE_CACHE_SZ 5 -/* -** This is an fts3ExprIterate() callback used while loading the doclists -** for each phrase into Fts3Expr.aDoclist[]/nDoclist. See also -** fts3ExprLoadDoclists(). +/* +** An rtree cursor object. */ -static int fts3ExprLoadDoclistsCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - int rc = SQLITE_OK; - Fts3Phrase *pPhrase = pExpr->pPhrase; - LoadDoclistCtx *p = (LoadDoclistCtx *)ctx; - - UNUSED_PARAMETER(iPhrase); - - p->nPhrase++; - p->nToken += pPhrase->nToken; +struct RtreeCursor { + sqlite3_vtab_cursor base; /* Base class. Must be first */ + u8 atEOF; /* True if at end of search */ + u8 bPoint; /* True if sPoint is valid */ + int iStrategy; /* Copy of idxNum search parameter */ + int nConstraint; /* Number of entries in aConstraint */ + RtreeConstraint *aConstraint; /* Search constraints. */ + int nPointAlloc; /* Number of slots allocated for aPoint[] */ + int nPoint; /* Number of slots used in aPoint[] */ + int mxLevel; /* iLevel value for root of the tree */ + RtreeSearchPoint *aPoint; /* Priority queue for search points */ + RtreeSearchPoint sPoint; /* Cached next search point */ + RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ + u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ +}; - return rc; -} +/* Return the Rtree of a RtreeCursor */ +#define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) /* -** Load the doclists for each phrase in the query associated with FTS3 cursor -** pCsr. -** -** If pnPhrase is not NULL, then *pnPhrase is set to the number of matchable -** phrases in the expression (all phrases except those directly or -** indirectly descended from the right-hand-side of a NOT operator). If -** pnToken is not NULL, then it is set to the number of tokens in all -** matchable phrases of the expression. +** A coordinate can be either a floating point number or a integer. All +** coordinates within a single R-Tree are always of the same time. */ -static int fts3ExprLoadDoclists( - Fts3Cursor *pCsr, /* Fts3 cursor for current query */ - int *pnPhrase, /* OUT: Number of phrases in query */ - int *pnToken /* OUT: Number of tokens in query */ -){ - int rc; /* Return Code */ - LoadDoclistCtx sCtx = {0,0,0}; /* Context for fts3ExprIterate() */ - sCtx.pCsr = pCsr; - rc = fts3ExprIterate(pCsr->pExpr, fts3ExprLoadDoclistsCb, (void *)&sCtx); - if( pnPhrase ) *pnPhrase = sCtx.nPhrase; - if( pnToken ) *pnToken = sCtx.nToken; - return rc; -} - -static int fts3ExprPhraseCountCb(Fts3Expr *pExpr, int iPhrase, void *ctx){ - (*(int *)ctx)++; - UNUSED_PARAMETER(pExpr); - UNUSED_PARAMETER(iPhrase); - return SQLITE_OK; -} -static int fts3ExprPhraseCount(Fts3Expr *pExpr){ - int nPhrase = 0; - (void)fts3ExprIterate(pExpr, fts3ExprPhraseCountCb, (void *)&nPhrase); - return nPhrase; -} +union RtreeCoord { + RtreeValue f; /* Floating point value */ + int i; /* Integer value */ + u32 u; /* Unsigned for byte-order conversions */ +}; /* -** Advance the position list iterator specified by the first two -** arguments so that it points to the first element with a value greater -** than or equal to parameter iNext. +** The argument is an RtreeCoord. Return the value stored within the RtreeCoord +** formatted as a RtreeDValue (double or int64). This macro assumes that local +** variable pRtree points to the Rtree structure associated with the +** RtreeCoord. */ -static void fts3SnippetAdvance(char **ppIter, int *piIter, int iNext){ - char *pIter = *ppIter; - if( pIter ){ - int iIter = *piIter; - - while( iIter<iNext ){ - if( 0==(*pIter & 0xFE) ){ - iIter = -1; - pIter = 0; - break; - } - fts3GetDeltaPosition(&pIter, &iIter); - } - - *piIter = iIter; - *ppIter = pIter; - } -} +#ifdef SQLITE_RTREE_INT_ONLY +# define DCOORD(coord) ((RtreeDValue)coord.i) +#else +# define DCOORD(coord) ( \ + (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ + ((double)coord.f) : \ + ((double)coord.i) \ + ) +#endif /* -** Advance the snippet iterator to the next candidate snippet. +** A search constraint. */ -static int fts3SnippetNextCandidate(SnippetIter *pIter){ - int i; /* Loop counter */ - - if( pIter->iCurrent<0 ){ - /* The SnippetIter object has just been initialized. The first snippet - ** candidate always starts at offset 0 (even if this candidate has a - ** score of 0.0). - */ - pIter->iCurrent = 0; - - /* Advance the 'head' iterator of each phrase to the first offset that - ** is greater than or equal to (iNext+nSnippet). - */ - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, pIter->nSnippet); - } - }else{ - int iStart; - int iEnd = 0x7FFFFFFF; - - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pHead && pPhrase->iHead<iEnd ){ - iEnd = pPhrase->iHead; - } - } - if( iEnd==0x7FFFFFFF ){ - return 1; - } +struct RtreeConstraint { + int iCoord; /* Index of constrained coordinate */ + int op; /* Constraining operation */ + union { + RtreeDValue rValue; /* Constraint value. */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + } u; + sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ +}; - pIter->iCurrent = iStart = iEnd - pIter->nSnippet + 1; - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - fts3SnippetAdvance(&pPhrase->pHead, &pPhrase->iHead, iEnd+1); - fts3SnippetAdvance(&pPhrase->pTail, &pPhrase->iTail, iStart); - } - } +/* Possible values for RtreeConstraint.op */ +#define RTREE_EQ 0x41 /* A */ +#define RTREE_LE 0x42 /* B */ +#define RTREE_LT 0x43 /* C */ +#define RTREE_GE 0x44 /* D */ +#define RTREE_GT 0x45 /* E */ +#define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ +#define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ - return 0; -} -/* -** Retrieve information about the current candidate snippet of snippet -** iterator pIter. +/* +** An rtree structure node. */ -static void fts3SnippetDetails( - SnippetIter *pIter, /* Snippet iterator */ - u64 mCovered, /* Bitmask of phrases already covered */ - int *piToken, /* OUT: First token of proposed snippet */ - int *piScore, /* OUT: "Score" for this snippet */ - u64 *pmCover, /* OUT: Bitmask of phrases covered */ - u64 *pmHighlight /* OUT: Bitmask of terms to highlight */ -){ - int iStart = pIter->iCurrent; /* First token of snippet */ - int iScore = 0; /* Score of this snippet */ - int i; /* Loop counter */ - u64 mCover = 0; /* Mask of phrases covered by this snippet */ - u64 mHighlight = 0; /* Mask of tokens to highlight in snippet */ - - for(i=0; i<pIter->nPhrase; i++){ - SnippetPhrase *pPhrase = &pIter->aPhrase[i]; - if( pPhrase->pTail ){ - char *pCsr = pPhrase->pTail; - int iCsr = pPhrase->iTail; - - while( iCsr<(iStart+pIter->nSnippet) ){ - int j; - u64 mPhrase = (u64)1 << i; - u64 mPos = (u64)1 << (iCsr - iStart); - assert( iCsr>=iStart ); - if( (mCover|mCovered)&mPhrase ){ - iScore++; - }else{ - iScore += 1000; - } - mCover |= mPhrase; - - for(j=0; j<pPhrase->nToken; j++){ - mHighlight |= (mPos>>j); - } - - if( 0==(*pCsr & 0x0FE) ) break; - fts3GetDeltaPosition(&pCsr, &iCsr); - } - } - } +struct RtreeNode { + RtreeNode *pParent; /* Parent node */ + i64 iNode; /* The node number */ + int nRef; /* Number of references to this node */ + int isDirty; /* True if the node needs to be written to disk */ + u8 *zData; /* Content of the node, as should be on disk */ + RtreeNode *pNext; /* Next node in this hash collision chain */ +}; - /* Set the output variables before returning. */ - *piToken = iStart; - *piScore = iScore; - *pmCover = mCover; - *pmHighlight = mHighlight; -} +/* Return the number of cells in a node */ +#define NCELL(pNode) readInt16(&(pNode)->zData[2]) -/* -** This function is an fts3ExprIterate() callback used by fts3BestSnippet(). -** Each invocation populates an element of the SnippetIter.aPhrase[] array. +/* +** A single cell from a node, deserialized */ -static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){ - SnippetIter *p = (SnippetIter *)ctx; - SnippetPhrase *pPhrase = &p->aPhrase[iPhrase]; - char *pCsr; - int rc; - - pPhrase->nToken = pExpr->pPhrase->nToken; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pCsr); - assert( rc==SQLITE_OK || pCsr==0 ); - if( pCsr ){ - int iFirst = 0; - pPhrase->pList = pCsr; - fts3GetDeltaPosition(&pCsr, &iFirst); - assert( iFirst>=0 ); - pPhrase->pHead = pCsr; - pPhrase->pTail = pCsr; - pPhrase->iHead = iFirst; - pPhrase->iTail = iFirst; - }else{ - assert( rc!=SQLITE_OK || ( - pPhrase->pList==0 && pPhrase->pHead==0 && pPhrase->pTail==0 - )); - } +struct RtreeCell { + i64 iRowid; /* Node or entry ID */ + RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ +}; - return rc; -} /* -** Select the fragment of text consisting of nFragment contiguous tokens -** from column iCol that represent the "best" snippet. The best snippet -** is the snippet with the highest score, where scores are calculated -** by adding: -** -** (a) +1 point for each occurrence of a matchable phrase in the snippet. -** -** (b) +1000 points for the first occurrence of each matchable phrase in -** the snippet for which the corresponding mCovered bit is not set. +** This object becomes the sqlite3_user_data() for the SQL functions +** that are created by sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() and which appear on the right of MATCH +** operators in order to constrain a search. ** -** The selected snippet parameters are stored in structure *pFragment before -** returning. The score of the selected snippet is stored in *piScore -** before returning. +** xGeom and xQueryFunc are the callback functions. Exactly one of +** xGeom and xQueryFunc fields is non-NULL, depending on whether the +** SQL function was created using sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback(). +** +** This object is deleted automatically by the destructor mechanism in +** sqlite3_create_function_v2(). */ -static int fts3BestSnippet( - int nSnippet, /* Desired snippet length */ - Fts3Cursor *pCsr, /* Cursor to create snippet for */ - int iCol, /* Index of column to create snippet from */ - u64 mCovered, /* Mask of phrases already covered */ - u64 *pmSeen, /* IN/OUT: Mask of phrases seen */ - SnippetFragment *pFragment, /* OUT: Best snippet found */ - int *piScore /* OUT: Score of snippet pFragment */ -){ - int rc; /* Return Code */ - int nList; /* Number of phrases in expression */ - SnippetIter sIter; /* Iterates through snippet candidates */ - int nByte; /* Number of bytes of space to allocate */ - int iBestScore = -1; /* Best snippet score found so far */ - int i; /* Loop counter */ - - memset(&sIter, 0, sizeof(sIter)); - - /* Iterate through the phrases in the expression to count them. The same - ** callback makes sure the doclists are loaded for each phrase. - */ - rc = fts3ExprLoadDoclists(pCsr, &nList, 0); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* Now that it is known how many phrases there are, allocate and zero - ** the required space using malloc(). - */ - nByte = sizeof(SnippetPhrase) * nList; - sIter.aPhrase = (SnippetPhrase *)sqlite3_malloc(nByte); - if( !sIter.aPhrase ){ - return SQLITE_NOMEM; - } - memset(sIter.aPhrase, 0, nByte); +struct RtreeGeomCallback { + int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); + int (*xQueryFunc)(sqlite3_rtree_query_info*); + void (*xDestructor)(void*); + void *pContext; +}; - /* Initialize the contents of the SnippetIter object. Then iterate through - ** the set of phrases in the expression to populate the aPhrase[] array. - */ - sIter.pCsr = pCsr; - sIter.iCol = iCol; - sIter.nSnippet = nSnippet; - sIter.nPhrase = nList; - sIter.iCurrent = -1; - rc = fts3ExprIterate(pCsr->pExpr, fts3SnippetFindPositions, (void *)&sIter); - if( rc==SQLITE_OK ){ - /* Set the *pmSeen output variable. */ - for(i=0; i<nList; i++){ - if( sIter.aPhrase[i].pHead ){ - *pmSeen |= (u64)1 << i; - } - } +/* +** Value for the first field of every RtreeMatchArg object. The MATCH +** operator tests that the first field of a blob operand matches this +** value to avoid operating on invalid blobs (which could cause a segfault). +*/ +#define RTREE_GEOMETRY_MAGIC 0x891245AB - /* Loop through all candidate snippets. Store the best snippet in - ** *pFragment. Store its associated 'score' in iBestScore. - */ - pFragment->iCol = iCol; - while( !fts3SnippetNextCandidate(&sIter) ){ - int iPos; - int iScore; - u64 mCover; - u64 mHighlite; - fts3SnippetDetails(&sIter, mCovered, &iPos, &iScore, &mCover,&mHighlite); - assert( iScore>=0 ); - if( iScore>iBestScore ){ - pFragment->iPos = iPos; - pFragment->hlmask = mHighlite; - pFragment->covered = mCover; - iBestScore = iScore; - } - } +/* +** An instance of this structure (in the form of a BLOB) is returned by +** the SQL functions that sqlite3_rtree_geometry_callback() and +** sqlite3_rtree_query_callback() create, and is read as the right-hand +** operand to the MATCH operator of an R-Tree. +*/ +struct RtreeMatchArg { + u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ + RtreeGeomCallback cb; /* Info about the callback functions */ + int nParam; /* Number of parameters to the SQL function */ + sqlite3_value **apSqlParam; /* Original SQL parameter values */ + RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ +}; - *piScore = iBestScore; - } - sqlite3_free(sIter.aPhrase); - return rc; -} +#ifndef MAX +# define MAX(x,y) ((x) < (y) ? (y) : (x)) +#endif +#ifndef MIN +# define MIN(x,y) ((x) > (y) ? (y) : (x)) +#endif +/* +** Functions to deserialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The deserialized value is returned. +*/ +static int readInt16(u8 *p){ + return (p[0]<<8) + p[1]; +} +static void readCoord(u8 *p, RtreeCoord *pCoord){ + pCoord->u = ( + (((u32)p[0]) << 24) + + (((u32)p[1]) << 16) + + (((u32)p[2]) << 8) + + (((u32)p[3]) << 0) + ); +} +static i64 readInt64(u8 *p){ + return ( + (((i64)p[0]) << 56) + + (((i64)p[1]) << 48) + + (((i64)p[2]) << 40) + + (((i64)p[3]) << 32) + + (((i64)p[4]) << 24) + + (((i64)p[5]) << 16) + + (((i64)p[6]) << 8) + + (((i64)p[7]) << 0) + ); +} /* -** Append a string to the string-buffer passed as the first argument. -** -** If nAppend is negative, then the length of the string zAppend is -** determined using strlen(). +** Functions to serialize a 16 bit integer, 32 bit real number and +** 64 bit integer. The value returned is the number of bytes written +** to the argument buffer (always 2, 4 and 8 respectively). */ -static int fts3StringAppend( - StrBuffer *pStr, /* Buffer to append to */ - const char *zAppend, /* Pointer to data to append to buffer */ - int nAppend /* Size of zAppend in bytes (or -1) */ -){ - if( nAppend<0 ){ - nAppend = (int)strlen(zAppend); - } +static int writeInt16(u8 *p, int i){ + p[0] = (i>> 8)&0xFF; + p[1] = (i>> 0)&0xFF; + return 2; +} +static int writeCoord(u8 *p, RtreeCoord *pCoord){ + u32 i; + assert( sizeof(RtreeCoord)==4 ); + assert( sizeof(u32)==4 ); + i = pCoord->u; + p[0] = (i>>24)&0xFF; + p[1] = (i>>16)&0xFF; + p[2] = (i>> 8)&0xFF; + p[3] = (i>> 0)&0xFF; + return 4; +} +static int writeInt64(u8 *p, i64 i){ + p[0] = (i>>56)&0xFF; + p[1] = (i>>48)&0xFF; + p[2] = (i>>40)&0xFF; + p[3] = (i>>32)&0xFF; + p[4] = (i>>24)&0xFF; + p[5] = (i>>16)&0xFF; + p[6] = (i>> 8)&0xFF; + p[7] = (i>> 0)&0xFF; + return 8; +} - /* If there is insufficient space allocated at StrBuffer.z, use realloc() - ** to grow the buffer until so that it is big enough to accomadate the - ** appended data. - */ - if( pStr->n+nAppend+1>=pStr->nAlloc ){ - int nAlloc = pStr->nAlloc+nAppend+100; - char *zNew = sqlite3_realloc(pStr->z, nAlloc); - if( !zNew ){ - return SQLITE_NOMEM; - } - pStr->z = zNew; - pStr->nAlloc = nAlloc; +/* +** Increment the reference count of node p. +*/ +static void nodeReference(RtreeNode *p){ + if( p ){ + p->nRef++; } - assert( pStr->z!=0 && (pStr->nAlloc >= pStr->n+nAppend+1) ); - - /* Append the data to the string buffer. */ - memcpy(&pStr->z[pStr->n], zAppend, nAppend); - pStr->n += nAppend; - pStr->z[pStr->n] = '\0'; - - return SQLITE_OK; } /* -** The fts3BestSnippet() function often selects snippets that end with a -** query term. That is, the final term of the snippet is always a term -** that requires highlighting. For example, if 'X' is a highlighted term -** and '.' is a non-highlighted term, BestSnippet() may select: -** -** ........X.....X -** -** This function "shifts" the beginning of the snippet forward in the -** document so that there are approximately the same number of -** non-highlighted terms to the right of the final highlighted term as there -** are to the left of the first highlighted term. For example, to this: -** -** ....X.....X.... -** -** This is done as part of extracting the snippet text, not when selecting -** the snippet. Snippet selection is done based on doclists only, so there -** is no way for fts3BestSnippet() to know whether or not the document -** actually contains terms that follow the final highlighted term. +** Clear the content of node p (set all bytes to 0x00). */ -static int fts3SnippetShift( - Fts3Table *pTab, /* FTS3 table snippet comes from */ - int iLangid, /* Language id to use in tokenizing */ - int nSnippet, /* Number of tokens desired for snippet */ - const char *zDoc, /* Document text to extract snippet from */ - int nDoc, /* Size of buffer zDoc in bytes */ - int *piPos, /* IN/OUT: First token of snippet */ - u64 *pHlmask /* IN/OUT: Mask of tokens to highlight */ -){ - u64 hlmask = *pHlmask; /* Local copy of initial highlight-mask */ +static void nodeZero(Rtree *pRtree, RtreeNode *p){ + memset(&p->zData[2], 0, pRtree->iNodeSize-2); + p->isDirty = 1; +} - if( hlmask ){ - int nLeft; /* Tokens to the left of first highlight */ - int nRight; /* Tokens to the right of last highlight */ - int nDesired; /* Ideal number of tokens to shift forward */ +/* +** Given a node number iNode, return the corresponding key to use +** in the Rtree.aHash table. +*/ +static int nodeHash(i64 iNode){ + return iNode % HASHSIZE; +} - for(nLeft=0; !(hlmask & ((u64)1 << nLeft)); nLeft++); - for(nRight=0; !(hlmask & ((u64)1 << (nSnippet-1-nRight))); nRight++); - nDesired = (nLeft-nRight)/2; +/* +** Search the node hash table for node iNode. If found, return a pointer +** to it. Otherwise, return 0. +*/ +static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ + RtreeNode *p; + for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); + return p; +} - /* Ideally, the start of the snippet should be pushed forward in the - ** document nDesired tokens. This block checks if there are actually - ** nDesired tokens to the right of the snippet. If so, *piPos and - ** *pHlMask are updated to shift the snippet nDesired tokens to the - ** right. Otherwise, the snippet is shifted by the number of tokens - ** available. - */ - if( nDesired>0 ){ - int nShift; /* Number of tokens to shift snippet by */ - int iCurrent = 0; /* Token counter */ - int rc; /* Return Code */ - sqlite3_tokenizer_module *pMod; - sqlite3_tokenizer_cursor *pC; - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; +/* +** Add node pNode to the node hash table. +*/ +static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ + int iHash; + assert( pNode->pNext==0 ); + iHash = nodeHash(pNode->iNode); + pNode->pNext = pRtree->aHash[iHash]; + pRtree->aHash[iHash] = pNode; +} - /* Open a cursor on zDoc/nDoc. Check if there are (nSnippet+nDesired) - ** or more tokens in zDoc/nDoc. - */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, iLangid, zDoc, nDoc, &pC); - if( rc!=SQLITE_OK ){ - return rc; - } - while( rc==SQLITE_OK && iCurrent<(nSnippet+nDesired) ){ - const char *ZDUMMY; int DUMMY1 = 0, DUMMY2 = 0, DUMMY3 = 0; - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &DUMMY2, &DUMMY3, &iCurrent); - } - pMod->xClose(pC); - if( rc!=SQLITE_OK && rc!=SQLITE_DONE ){ return rc; } +/* +** Remove node pNode from the node hash table. +*/ +static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode **pp; + if( pNode->iNode!=0 ){ + pp = &pRtree->aHash[nodeHash(pNode->iNode)]; + for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } + *pp = pNode->pNext; + pNode->pNext = 0; + } +} - nShift = (rc==SQLITE_DONE)+iCurrent-nSnippet; - assert( nShift<=nDesired ); - if( nShift>0 ){ - *piPos += nShift; - *pHlmask = hlmask >> nShift; - } - } +/* +** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), +** indicating that node has not yet been assigned a node number. It is +** assigned a node number when nodeWrite() is called to write the +** node contents out to the database. +*/ +static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ + RtreeNode *pNode; + pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); + if( pNode ){ + memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pNode->pParent = pParent; + pNode->isDirty = 1; + nodeReference(pParent); } - return SQLITE_OK; + return pNode; } /* -** Extract the snippet text for fragment pFragment from cursor pCsr and -** append it to string buffer pOut. +** Obtain a reference to an r-tree node. */ -static int fts3SnippetText( - Fts3Cursor *pCsr, /* FTS3 Cursor */ - SnippetFragment *pFragment, /* Snippet to extract */ - int iFragment, /* Fragment number */ - int isLast, /* True for final fragment in snippet */ - int nSnippet, /* Number of tokens in extracted snippet */ - const char *zOpen, /* String inserted before highlighted term */ - const char *zClose, /* String inserted after highlighted term */ - const char *zEllipsis, /* String inserted between snippets */ - StrBuffer *pOut /* Write output here */ +static int nodeAcquire( + Rtree *pRtree, /* R-tree structure */ + i64 iNode, /* Node number to load */ + RtreeNode *pParent, /* Either the parent node or NULL */ + RtreeNode **ppNode /* OUT: Acquired node */ ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc; /* Return code */ - const char *zDoc; /* Document text to extract snippet from */ - int nDoc; /* Size of zDoc in bytes */ - int iCurrent = 0; /* Current token number of document */ - int iEnd = 0; /* Byte offset of end of current token */ - int isShiftDone = 0; /* True after snippet is shifted */ - int iPos = pFragment->iPos; /* First token of snippet */ - u64 hlmask = pFragment->hlmask; /* Highlight-mask for snippet */ - int iCol = pFragment->iCol+1; /* Query column to extract text from */ - sqlite3_tokenizer_module *pMod; /* Tokenizer module methods object */ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor open on zDoc/nDoc */ - - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol)!=SQLITE_NULL ){ - return SQLITE_NOMEM; + int rc; + int rc2 = SQLITE_OK; + RtreeNode *pNode; + + /* Check if the requested node is already in the hash table. If so, + ** increase its reference count and return it. + */ + if( (pNode = nodeHashLookup(pRtree, iNode)) ){ + assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); + if( pParent && !pNode->pParent ){ + nodeReference(pParent); + pNode->pParent = pParent; } + pNode->nRef++; + *ppNode = pNode; return SQLITE_OK; } - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol); - - /* Open a token cursor on the document. */ - pMod = (sqlite3_tokenizer_module *)pTab->pTokenizer->pModule; - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, zDoc,nDoc,&pC); - if( rc!=SQLITE_OK ){ - return rc; - } - - while( rc==SQLITE_OK ){ - const char *ZDUMMY; /* Dummy argument used with tokenizer */ - int DUMMY1 = -1; /* Dummy argument used with tokenizer */ - int iBegin = 0; /* Offset in zDoc of start of token */ - int iFin = 0; /* Offset in zDoc of end of token */ - int isHighlight = 0; /* True for highlighted terms */ - /* Variable DUMMY1 is initialized to a negative value above. Elsewhere - ** in the FTS code the variable that the third argument to xNext points to - ** is initialized to zero before the first (*but not necessarily - ** subsequent*) call to xNext(). This is done for a particular application - ** that needs to know whether or not the tokenizer is being used for - ** snippet generation or for some other purpose. - ** - ** Extreme care is required when writing code to depend on this - ** initialization. It is not a documented part of the tokenizer interface. - ** If a tokenizer is used directly by any code outside of FTS, this - ** convention might not be respected. */ - rc = pMod->xNext(pC, &ZDUMMY, &DUMMY1, &iBegin, &iFin, &iCurrent); - if( rc!=SQLITE_OK ){ - if( rc==SQLITE_DONE ){ - /* Special case - the last token of the snippet is also the last token - ** of the column. Append any punctuation that occurred between the end - ** of the previous token and the end of the document to the output. - ** Then break out of the loop. */ - rc = fts3StringAppend(pOut, &zDoc[iEnd], -1); + sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); + rc = sqlite3_step(pRtree->pReadNode); + if( rc==SQLITE_ROW ){ + const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); + if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ + pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); + if( !pNode ){ + rc2 = SQLITE_NOMEM; + }else{ + pNode->pParent = pParent; + pNode->zData = (u8 *)&pNode[1]; + pNode->nRef = 1; + pNode->iNode = iNode; + pNode->isDirty = 0; + pNode->pNext = 0; + memcpy(pNode->zData, zBlob, pRtree->iNodeSize); + nodeReference(pParent); } - break; } - if( iCurrent<iPos ){ continue; } - - if( !isShiftDone ){ - int n = nDoc - iBegin; - rc = fts3SnippetShift( - pTab, pCsr->iLangid, nSnippet, &zDoc[iBegin], n, &iPos, &hlmask - ); - isShiftDone = 1; + } + rc = sqlite3_reset(pRtree->pReadNode); + if( rc==SQLITE_OK ) rc = rc2; - /* Now that the shift has been done, check if the initial "..." are - ** required. They are required if (a) this is not the first fragment, - ** or (b) this fragment does not begin at position 0 of its column. - */ - if( rc==SQLITE_OK ){ - if( iPos>0 || iFragment>0 ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - }else if( iBegin ){ - rc = fts3StringAppend(pOut, zDoc, iBegin); - } - } - if( rc!=SQLITE_OK || iCurrent<iPos ) continue; + /* If the root node was just loaded, set pRtree->iDepth to the height + ** of the r-tree structure. A height of zero means all data is stored on + ** the root node. A height of one means the children of the root node + ** are the leaves, and so on. If the depth as specified on the root node + ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. + */ + if( pNode && iNode==1 ){ + pRtree->iDepth = readInt16(pNode->zData); + if( pRtree->iDepth>RTREE_MAX_DEPTH ){ + rc = SQLITE_CORRUPT_VTAB; } + } - if( iCurrent>=(iPos+nSnippet) ){ - if( isLast ){ - rc = fts3StringAppend(pOut, zEllipsis, -1); - } - break; + /* If no error has occurred so far, check if the "number of entries" + ** field on the node is too large. If so, set the return code to + ** SQLITE_CORRUPT_VTAB. + */ + if( pNode && rc==SQLITE_OK ){ + if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ + rc = SQLITE_CORRUPT_VTAB; } + } - /* Set isHighlight to true if this term should be highlighted. */ - isHighlight = (hlmask & ((u64)1 << (iCurrent-iPos)))!=0; - - if( iCurrent>iPos ) rc = fts3StringAppend(pOut, &zDoc[iEnd], iBegin-iEnd); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zOpen, -1); - if( rc==SQLITE_OK ) rc = fts3StringAppend(pOut, &zDoc[iBegin], iFin-iBegin); - if( rc==SQLITE_OK && isHighlight ) rc = fts3StringAppend(pOut, zClose, -1); - - iEnd = iFin; + if( rc==SQLITE_OK ){ + if( pNode!=0 ){ + nodeHashInsert(pRtree, pNode); + }else{ + rc = SQLITE_CORRUPT_VTAB; + } + *ppNode = pNode; + }else{ + sqlite3_free(pNode); + *ppNode = 0; } - pMod->xClose(pC); return rc; } - /* -** This function is used to count the entries in a column-list (a -** delta-encoded list of term offsets within a single column of a single -** row). When this function is called, *ppCollist should point to the -** beginning of the first varint in the column-list (the varint that -** contains the position of the first matching term in the column data). -** Before returning, *ppCollist is set to point to the first byte after -** the last varint in the column-list (either the 0x00 signifying the end -** of the position-list, or the 0x01 that precedes the column number of -** the next column in the position-list). -** -** The number of elements in the column-list is returned. +** Overwrite cell iCell of node pNode with the contents of pCell. */ -static int fts3ColumnlistCount(char **ppCollist){ - char *pEnd = *ppCollist; - char c = 0; - int nEntry = 0; - - /* A column-list is terminated by either a 0x01 or 0x00. */ - while( 0xFE & (*pEnd | c) ){ - c = *pEnd++ & 0x80; - if( !c ) nEntry++; +static void nodeOverwriteCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node into which the cell is to be written */ + RtreeCell *pCell, /* The cell to write */ + int iCell /* Index into pNode into which pCell is written */ +){ + int ii; + u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + p += writeInt64(p, pCell->iRowid); + for(ii=0; ii<(pRtree->nDim*2); ii++){ + p += writeCoord(p, &pCell->aCoord[ii]); } + pNode->isDirty = 1; +} - *ppCollist = pEnd; - return nEntry; +/* +** Remove the cell with index iCell from node pNode. +*/ +static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ + u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; + u8 *pSrc = &pDst[pRtree->nBytesPerCell]; + int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; + memmove(pDst, pSrc, nByte); + writeInt16(&pNode->zData[2], NCELL(pNode)-1); + pNode->isDirty = 1; } /* -** fts3ExprIterate() callback used to collect the "global" matchinfo stats -** for a single query. -** -** fts3ExprIterate() callback to load the 'global' elements of a -** FTS3_MATCHINFO_HITS matchinfo array. The global stats are those elements -** of the matchinfo array that are constant for all rows returned by the -** current query. -** -** Argument pCtx is actually a pointer to a struct of type MatchInfo. This -** function populates Matchinfo.aMatchinfo[] as follows: -** -** for(iCol=0; iCol<nCol; iCol++){ -** aMatchinfo[3*iPhrase*nCol + 3*iCol + 1] = X; -** aMatchinfo[3*iPhrase*nCol + 3*iCol + 2] = Y; -** } -** -** where X is the number of matches for phrase iPhrase is column iCol of all -** rows of the table. Y is the number of rows for which column iCol contains -** at least one instance of phrase iPhrase. +** Insert the contents of cell pCell into node pNode. If the insert +** is successful, return SQLITE_OK. ** -** If the phrase pExpr consists entirely of deferred tokens, then all X and -** Y values are set to nDoc, where nDoc is the number of documents in the -** file system. This is done because the full-text index doclist is required -** to calculate these values properly, and the full-text index doclist is -** not available for deferred tokens. +** If there is not enough free space in pNode, return SQLITE_FULL. */ -static int fts3ExprGlobalHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number (numbered from zero) */ - void *pCtx /* Pointer to MatchInfo structure */ +static int nodeInsertCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* Write new cell into this node */ + RtreeCell *pCell /* The cell to be inserted */ ){ - MatchInfo *p = (MatchInfo *)pCtx; - return sqlite3Fts3EvalPhraseStats( - p->pCursor, pExpr, &p->aMatchinfo[3*iPhrase*p->nCol] - ); + int nCell; /* Current number of cells in pNode */ + int nMaxCell; /* Maximum number of cells for pNode */ + + nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; + nCell = NCELL(pNode); + + assert( nCell<=nMaxCell ); + if( nCell<nMaxCell ){ + nodeOverwriteCell(pRtree, pNode, pCell, nCell); + writeInt16(&pNode->zData[2], nCell+1); + pNode->isDirty = 1; + } + + return (nCell==nMaxCell); } /* -** fts3ExprIterate() callback used to collect the "local" part of the -** FTS3_MATCHINFO_HITS array. The local stats are those elements of the -** array that are different for each row returned by the query. +** If the node is dirty, write it out to the database. */ -static int fts3ExprLocalHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number */ - void *pCtx /* Pointer to MatchInfo structure */ -){ +static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ int rc = SQLITE_OK; - MatchInfo *p = (MatchInfo *)pCtx; - int iStart = iPhrase * p->nCol * 3; - int i; - - for(i=0; i<p->nCol && rc==SQLITE_OK; i++){ - char *pCsr; - rc = sqlite3Fts3EvalPhrasePoslist(p->pCursor, pExpr, i, &pCsr); - if( pCsr ){ - p->aMatchinfo[iStart+i*3] = fts3ColumnlistCount(&pCsr); + if( pNode->isDirty ){ + sqlite3_stmt *p = pRtree->pWriteNode; + if( pNode->iNode ){ + sqlite3_bind_int64(p, 1, pNode->iNode); }else{ - p->aMatchinfo[iStart+i*3] = 0; + sqlite3_bind_null(p, 1); + } + sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); + sqlite3_step(p); + pNode->isDirty = 0; + rc = sqlite3_reset(p); + if( pNode->iNode==0 && rc==SQLITE_OK ){ + pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); + nodeHashInsert(pRtree, pNode); } } - return rc; } /* -** fts3ExprIterate() callback used to gather information for the matchinfo -** directive 'y'. +** Release a reference to a node. If the node is dirty and the reference +** count drops to zero, the node data is written to the database. */ -static int fts3ExprLHitsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - MatchInfo *p = (MatchInfo *)pCtx; - Fts3Table *pTab = (Fts3Table *)p->pCursor->base.pVtab; +static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ int rc = SQLITE_OK; - int iStart = iPhrase * p->nCol; - Fts3Expr *pEof; /* Ancestor node already at EOF */ - - /* This must be a phrase */ - assert( pExpr->pPhrase ); - - /* Initialize all output integers to zero. */ - memset(&p->aMatchinfo[iStart], 0, sizeof(u32) * p->nCol); - - /* Check if this or any parent node is at EOF. If so, then all output - ** values are zero. */ - for(pEof=pExpr; pEof && pEof->bEof==0; pEof=pEof->pParent); - - if( pEof==0 && pExpr->iDocid==p->pCursor->iPrevId ){ - Fts3Phrase *pPhrase = pExpr->pPhrase; - char *pIter = pPhrase->doclist.pList; - int iCol = 0; - - while( 1 ){ - int nHit = fts3ColumnlistCount(&pIter); - if( (pPhrase->iColumn>=pTab->nColumn || pPhrase->iColumn==iCol) ){ - p->aMatchinfo[iStart + iCol] = (u32)nHit; + if( pNode ){ + assert( pNode->nRef>0 ); + pNode->nRef--; + if( pNode->nRef==0 ){ + if( pNode->iNode==1 ){ + pRtree->iDepth = -1; } - assert( *pIter==0x00 || *pIter==0x01 ); - if( *pIter!=0x01 ) break; - pIter++; - pIter += fts3GetVarint32(pIter, &iCol); + if( pNode->pParent ){ + rc = nodeRelease(pRtree, pNode->pParent); + } + if( rc==SQLITE_OK ){ + rc = nodeWrite(pRtree, pNode); + } + nodeHashDelete(pRtree, pNode); + sqlite3_free(pNode); } } - return rc; } -static int fts3MatchinfoCheck( - Fts3Table *pTab, - char cArg, - char **pzErr +/* +** Return the 64-bit integer value associated with cell iCell of +** node pNode. If pNode is a leaf node, this is a rowid. If it is +** an internal node, then the 64-bit integer is a child page number. +*/ +static i64 nodeGetRowid( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract the ID */ + int iCell /* The cell index from which to extract the ID */ ){ - if( (cArg==FTS3_MATCHINFO_NPHRASE) - || (cArg==FTS3_MATCHINFO_NCOL) - || (cArg==FTS3_MATCHINFO_NDOC && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_AVGLENGTH && pTab->bFts4) - || (cArg==FTS3_MATCHINFO_LENGTH && pTab->bHasDocsize) - || (cArg==FTS3_MATCHINFO_LCS) - || (cArg==FTS3_MATCHINFO_HITS) - || (cArg==FTS3_MATCHINFO_LHITS) - ){ - return SQLITE_OK; - } - sqlite3Fts3ErrMsg(pzErr, "unrecognized matchinfo request: %c", cArg); - return SQLITE_ERROR; + assert( iCell<NCELL(pNode) ); + return readInt64(&pNode->zData[4 + pRtree->nBytesPerCell*iCell]); } -static int fts3MatchinfoSize(MatchInfo *pInfo, char cArg){ - int nVal; /* Number of integers output by cArg */ - - switch( cArg ){ - case FTS3_MATCHINFO_NDOC: - case FTS3_MATCHINFO_NPHRASE: - case FTS3_MATCHINFO_NCOL: - nVal = 1; - break; +/* +** Return coordinate iCoord from cell iCell in node pNode. +*/ +static void nodeGetCoord( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node from which to extract a coordinate */ + int iCell, /* The index of the cell within the node */ + int iCoord, /* Which coordinate to extract */ + RtreeCoord *pCoord /* OUT: Space to write result to */ +){ + readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); +} - case FTS3_MATCHINFO_AVGLENGTH: - case FTS3_MATCHINFO_LENGTH: - case FTS3_MATCHINFO_LCS: - nVal = pInfo->nCol; - break; +/* +** Deserialize cell iCell of node pNode. Populate the structure pointed +** to by pCell with the results. +*/ +static void nodeGetCell( + Rtree *pRtree, /* The overall R-Tree */ + RtreeNode *pNode, /* The node containing the cell to be read */ + int iCell, /* Index of the cell within the node */ + RtreeCell *pCell /* OUT: Write the cell contents here */ +){ + u8 *pData; + RtreeCoord *pCoord; + int ii; + pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); + pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); + pCoord = pCell->aCoord; + for(ii=0; ii<pRtree->nDim*2; ii++){ + readCoord(&pData[ii*4], &pCoord[ii]); + } +} - case FTS3_MATCHINFO_LHITS: - nVal = pInfo->nCol * pInfo->nPhrase; - break; - default: - assert( cArg==FTS3_MATCHINFO_HITS ); - nVal = pInfo->nCol * pInfo->nPhrase * 3; - break; - } +/* Forward declaration for the function that does the work of +** the virtual table module xCreate() and xConnect() methods. +*/ +static int rtreeInit( + sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int +); - return nVal; +/* +** Rtree virtual table module xCreate method. +*/ +static int rtreeCreate( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr +){ + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); } -static int fts3MatchinfoSelectDoctotal( - Fts3Table *pTab, - sqlite3_stmt **ppStmt, - sqlite3_int64 *pnDoc, - const char **paLen +/* +** Rtree virtual table module xConnect method. +*/ +static int rtreeConnect( + sqlite3 *db, + void *pAux, + int argc, const char *const*argv, + sqlite3_vtab **ppVtab, + char **pzErr ){ - sqlite3_stmt *pStmt; - const char *a; - sqlite3_int64 nDoc; - - if( !*ppStmt ){ - int rc = sqlite3Fts3SelectDoctotal(pTab, ppStmt); - if( rc!=SQLITE_OK ) return rc; - } - pStmt = *ppStmt; - assert( sqlite3_data_count(pStmt)==1 ); - - a = sqlite3_column_blob(pStmt, 0); - a += sqlite3Fts3GetVarint(a, &nDoc); - if( nDoc==0 ) return FTS_CORRUPT_VTAB; - *pnDoc = (u32)nDoc; + return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); +} - if( paLen ) *paLen = a; - return SQLITE_OK; +/* +** Increment the r-tree reference count. +*/ +static void rtreeReference(Rtree *pRtree){ + pRtree->nBusy++; } /* -** An instance of the following structure is used to store state while -** iterating through a multi-column position-list corresponding to the -** hits for a single phrase on a single row in order to calculate the -** values for a matchinfo() FTS3_MATCHINFO_LCS request. +** Decrement the r-tree reference count. When the reference count reaches +** zero the structure is deleted. */ -typedef struct LcsIterator LcsIterator; -struct LcsIterator { - Fts3Expr *pExpr; /* Pointer to phrase expression */ - int iPosOffset; /* Tokens count up to end of this phrase */ - char *pRead; /* Cursor used to iterate through aDoclist */ - int iPos; /* Current position */ -}; +static void rtreeRelease(Rtree *pRtree){ + pRtree->nBusy--; + if( pRtree->nBusy==0 ){ + sqlite3_finalize(pRtree->pReadNode); + sqlite3_finalize(pRtree->pWriteNode); + sqlite3_finalize(pRtree->pDeleteNode); + sqlite3_finalize(pRtree->pReadRowid); + sqlite3_finalize(pRtree->pWriteRowid); + sqlite3_finalize(pRtree->pDeleteRowid); + sqlite3_finalize(pRtree->pReadParent); + sqlite3_finalize(pRtree->pWriteParent); + sqlite3_finalize(pRtree->pDeleteParent); + sqlite3_free(pRtree); + } +} /* -** If LcsIterator.iCol is set to the following value, the iterator has -** finished iterating through all offsets for all columns. +** Rtree virtual table module xDisconnect method. */ -#define LCS_ITERATOR_FINISHED 0x7FFFFFFF; - -static int fts3MatchinfoLcsCb( - Fts3Expr *pExpr, /* Phrase expression node */ - int iPhrase, /* Phrase number (numbered from zero) */ - void *pCtx /* Pointer to MatchInfo structure */ -){ - LcsIterator *aIter = (LcsIterator *)pCtx; - aIter[iPhrase].pExpr = pExpr; +static int rtreeDisconnect(sqlite3_vtab *pVtab){ + rtreeRelease((Rtree *)pVtab); return SQLITE_OK; } -/* -** Advance the iterator passed as an argument to the next position. Return -** 1 if the iterator is at EOF or if it now points to the start of the -** position list for the next column. +/* +** Rtree virtual table module xDestroy method. */ -static int fts3LcsIteratorAdvance(LcsIterator *pIter){ - char *pRead = pIter->pRead; - sqlite3_int64 iRead; - int rc = 0; - - pRead += sqlite3Fts3GetVarint(pRead, &iRead); - if( iRead==0 || iRead==1 ){ - pRead = 0; - rc = 1; +static int rtreeDestroy(sqlite3_vtab *pVtab){ + Rtree *pRtree = (Rtree *)pVtab; + int rc; + char *zCreate = sqlite3_mprintf( + "DROP TABLE '%q'.'%q_node';" + "DROP TABLE '%q'.'%q_rowid';" + "DROP TABLE '%q'.'%q_parent';", + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName, + pRtree->zDb, pRtree->zName + ); + if( !zCreate ){ + rc = SQLITE_NOMEM; }else{ - pIter->iPos += (int)(iRead-2); + rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + } + if( rc==SQLITE_OK ){ + rtreeRelease(pRtree); } - pIter->pRead = pRead; return rc; } - -/* -** This function implements the FTS3_MATCHINFO_LCS matchinfo() flag. -** -** If the call is successful, the longest-common-substring lengths for each -** column are written into the first nCol elements of the pInfo->aMatchinfo[] -** array before returning. SQLITE_OK is returned in this case. -** -** Otherwise, if an error occurs, an SQLite error code is returned and the -** data written to the first nCol elements of pInfo->aMatchinfo[] is -** undefined. -*/ -static int fts3MatchinfoLcs(Fts3Cursor *pCsr, MatchInfo *pInfo){ - LcsIterator *aIter; - int i; - int iCol; - int nToken = 0; - /* Allocate and populate the array of LcsIterator objects. The array - ** contains one element for each matchable phrase in the query. - **/ - aIter = sqlite3_malloc(sizeof(LcsIterator) * pCsr->nPhrase); - if( !aIter ) return SQLITE_NOMEM; - memset(aIter, 0, sizeof(LcsIterator) * pCsr->nPhrase); - (void)fts3ExprIterate(pCsr->pExpr, fts3MatchinfoLcsCb, (void*)aIter); +/* +** Rtree virtual table module xOpen method. +*/ +static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ + int rc = SQLITE_NOMEM; + RtreeCursor *pCsr; - for(i=0; i<pInfo->nPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - nToken -= pIter->pExpr->pPhrase->nToken; - pIter->iPosOffset = nToken; + pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); + if( pCsr ){ + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = pVTab; + rc = SQLITE_OK; } + *ppCursor = (sqlite3_vtab_cursor *)pCsr; - for(iCol=0; iCol<pInfo->nCol; iCol++){ - int nLcs = 0; /* LCS value for this column */ - int nLive = 0; /* Number of iterators in aIter not at EOF */ - - for(i=0; i<pInfo->nPhrase; i++){ - int rc; - LcsIterator *pIt = &aIter[i]; - rc = sqlite3Fts3EvalPhrasePoslist(pCsr, pIt->pExpr, iCol, &pIt->pRead); - if( rc!=SQLITE_OK ) return rc; - if( pIt->pRead ){ - pIt->iPos = pIt->iPosOffset; - fts3LcsIteratorAdvance(&aIter[i]); - nLive++; - } - } + return rc; +} - while( nLive>0 ){ - LcsIterator *pAdv = 0; /* The iterator to advance by one position */ - int nThisLcs = 0; /* LCS for the current iterator positions */ - for(i=0; i<pInfo->nPhrase; i++){ - LcsIterator *pIter = &aIter[i]; - if( pIter->pRead==0 ){ - /* This iterator is already at EOF for this column. */ - nThisLcs = 0; - }else{ - if( pAdv==0 || pIter->iPos<pAdv->iPos ){ - pAdv = pIter; - } - if( nThisLcs==0 || pIter->iPos==pIter[-1].iPos ){ - nThisLcs++; - }else{ - nThisLcs = 1; - } - if( nThisLcs>nLcs ) nLcs = nThisLcs; - } +/* +** Free the RtreeCursor.aConstraint[] array and its contents. +*/ +static void freeCursorConstraints(RtreeCursor *pCsr){ + if( pCsr->aConstraint ){ + int i; /* Used to iterate through constraint array */ + for(i=0; i<pCsr->nConstraint; i++){ + sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; + if( pInfo ){ + if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); + sqlite3_free(pInfo); } - if( fts3LcsIteratorAdvance(pAdv) ) nLive--; } - - pInfo->aMatchinfo[iCol] = nLcs; + sqlite3_free(pCsr->aConstraint); + pCsr->aConstraint = 0; } +} - sqlite3_free(aIter); +/* +** Rtree virtual table module xClose method. +*/ +static int rtreeClose(sqlite3_vtab_cursor *cur){ + Rtree *pRtree = (Rtree *)(cur->pVtab); + int ii; + RtreeCursor *pCsr = (RtreeCursor *)cur; + freeCursorConstraints(pCsr); + sqlite3_free(pCsr->aPoint); + for(ii=0; ii<RTREE_CACHE_SZ; ii++) nodeRelease(pRtree, pCsr->aNode[ii]); + sqlite3_free(pCsr); return SQLITE_OK; } /* -** Populate the buffer pInfo->aMatchinfo[] with an array of integers to -** be returned by the matchinfo() function. Argument zArg contains the -** format string passed as the second argument to matchinfo (or the -** default value "pcx" if no second argument was specified). The format -** string has already been validated and the pInfo->aMatchinfo[] array -** is guaranteed to be large enough for the output. -** -** If bGlobal is true, then populate all fields of the matchinfo() output. -** If it is false, then assume that those fields that do not change between -** rows (i.e. FTS3_MATCHINFO_NPHRASE, NCOL, NDOC, AVGLENGTH and part of HITS) -** have already been populated. +** Rtree virtual table module xEof method. ** -** Return SQLITE_OK if successful, or an SQLite error code if an error -** occurs. If a value other than SQLITE_OK is returned, the state the -** pInfo->aMatchinfo[] buffer is left in is undefined. +** Return non-zero if the cursor does not currently point to a valid +** record (i.e if the scan has finished), or zero otherwise. */ -static int fts3MatchinfoValues( - Fts3Cursor *pCsr, /* FTS3 cursor object */ - int bGlobal, /* True to grab the global stats */ - MatchInfo *pInfo, /* Matchinfo context object */ - const char *zArg /* Matchinfo format string */ -){ - int rc = SQLITE_OK; - int i; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_stmt *pSelect = 0; - - for(i=0; rc==SQLITE_OK && zArg[i]; i++){ - - switch( zArg[i] ){ - case FTS3_MATCHINFO_NPHRASE: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nPhrase; - break; - - case FTS3_MATCHINFO_NCOL: - if( bGlobal ) pInfo->aMatchinfo[0] = pInfo->nCol; - break; - - case FTS3_MATCHINFO_NDOC: - if( bGlobal ){ - sqlite3_int64 nDoc = 0; - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, 0); - pInfo->aMatchinfo[0] = (u32)nDoc; - } - break; - - case FTS3_MATCHINFO_AVGLENGTH: - if( bGlobal ){ - sqlite3_int64 nDoc; /* Number of rows in table */ - const char *a; /* Aggregate column length array */ - - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &nDoc, &a); - if( rc==SQLITE_OK ){ - int iCol; - for(iCol=0; iCol<pInfo->nCol; iCol++){ - u32 iVal; - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - iVal = (u32)(((u32)(nToken&0xffffffff)+nDoc/2)/nDoc); - pInfo->aMatchinfo[iCol] = iVal; - } - } - } - break; +static int rtreeEof(sqlite3_vtab_cursor *cur){ + RtreeCursor *pCsr = (RtreeCursor *)cur; + return pCsr->atEOF; +} - case FTS3_MATCHINFO_LENGTH: { - sqlite3_stmt *pSelectDocsize = 0; - rc = sqlite3Fts3SelectDocsize(pTab, pCsr->iPrevId, &pSelectDocsize); - if( rc==SQLITE_OK ){ - int iCol; - const char *a = sqlite3_column_blob(pSelectDocsize, 0); - for(iCol=0; iCol<pInfo->nCol; iCol++){ - sqlite3_int64 nToken; - a += sqlite3Fts3GetVarint(a, &nToken); - pInfo->aMatchinfo[iCol] = (u32)nToken; - } - } - sqlite3_reset(pSelectDocsize); - break; - } +/* +** Convert raw bits from the on-disk RTree record into a coordinate value. +** The on-disk format is big-endian and needs to be converted for little- +** endian platforms. The on-disk record stores integer coordinates if +** eInt is true and it stores 32-bit floating point records if eInt is +** false. a[] is the four bytes of the on-disk record to be decoded. +** Store the results in "r". +** +** There are three versions of this macro, one each for little-endian and +** big-endian processors and a third generic implementation. The endian- +** specific implementations are much faster and are preferred if the +** processor endianness is known at compile-time. The SQLITE_BYTEORDER +** macro is part of sqliteInt.h and hence the endian-specific +** implementation will only be used if this module is compiled as part +** of the amalgamation. +*/ +#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ + ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + memcpy(&c.u,a,4); \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#else +#define RTREE_DECODE_COORD(eInt, a, r) { \ + RtreeCoord c; /* Coordinate decoded */ \ + c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ + +((u32)a[2]<<8) + a[3]; \ + r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ +} +#endif - case FTS3_MATCHINFO_LCS: - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc==SQLITE_OK ){ - rc = fts3MatchinfoLcs(pCsr, pInfo); - } - break; +/* +** Check the RTree node or entry given by pCellData and p against the MATCH +** constraint pConstraint. +*/ +static int rtreeCallbackConstraint( + RtreeConstraint *pConstraint, /* The constraint to test */ + int eInt, /* True if RTree holding integer coordinates */ + u8 *pCellData, /* Raw cell content */ + RtreeSearchPoint *pSearch, /* Container of this cell */ + sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ + int *peWithin /* OUT: visibility of the cell */ +){ + int i; /* Loop counter */ + sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ + int nCoord = pInfo->nCoord; /* No. of coordinates */ + int rc; /* Callback return code */ + sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ - case FTS3_MATCHINFO_LHITS: - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprLHitsCb, (void*)pInfo); - break; + assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); + assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); - default: { - Fts3Expr *pExpr; - assert( zArg[i]==FTS3_MATCHINFO_HITS ); - pExpr = pCsr->pExpr; - rc = fts3ExprLoadDoclists(pCsr, 0, 0); - if( rc!=SQLITE_OK ) break; - if( bGlobal ){ - if( pCsr->pDeferred ){ - rc = fts3MatchinfoSelectDoctotal(pTab, &pSelect, &pInfo->nDoc, 0); - if( rc!=SQLITE_OK ) break; - } - rc = fts3ExprIterate(pExpr, fts3ExprGlobalHitsCb,(void*)pInfo); - if( rc!=SQLITE_OK ) break; - } - (void)fts3ExprIterate(pExpr, fts3ExprLocalHitsCb,(void*)pInfo); - break; - } + if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ + pInfo->iRowid = readInt64(pCellData); + } + pCellData += 8; + for(i=0; i<nCoord; i++, pCellData += 4){ + RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]); + } + if( pConstraint->op==RTREE_MATCH ){ + rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, + nCoord, aCoord, &i); + if( i==0 ) *peWithin = NOT_WITHIN; + *prScore = RTREE_ZERO; + }else{ + pInfo->aCoord = aCoord; + pInfo->iLevel = pSearch->iLevel - 1; + pInfo->rScore = pInfo->rParentScore = pSearch->rScore; + pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; + rc = pConstraint->u.xQueryFunc(pInfo); + if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; + if( pInfo->rScore<*prScore || *prScore<RTREE_ZERO ){ + *prScore = pInfo->rScore; } - - pInfo->aMatchinfo += fts3MatchinfoSize(pInfo, zArg[i]); } - - sqlite3_reset(pSelect); return rc; } - -/* -** Populate pCsr->aMatchinfo[] with data for the current row. The -** 'matchinfo' data is an array of 32-bit unsigned integers (C type u32). +/* +** Check the internal RTree node given by pCellData against constraint p. +** If this constraint cannot be satisfied by any child within the node, +** set *peWithin to NOT_WITHIN. */ -static int fts3GetMatchinfo( - Fts3Cursor *pCsr, /* FTS3 Cursor object */ - const char *zArg /* Second argument to matchinfo() function */ +static void rtreeNonleafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ ){ - MatchInfo sInfo; - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int bGlobal = 0; /* Collect 'global' stats as well as local */ + sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ - memset(&sInfo, 0, sizeof(MatchInfo)); - sInfo.pCursor = pCsr; - sInfo.nCol = pTab->nColumn; + /* p->iCoord might point to either a lower or upper bound coordinate + ** in a coordinate pair. But make pCellData point to the lower bound. + */ + pCellData += 8 + 4*(p->iCoord&0xfe); - /* If there is cached matchinfo() data, but the format string for the - ** cache does not match the format string for this request, discard - ** the cached data. */ - if( pCsr->zMatchinfo && strcmp(pCsr->zMatchinfo, zArg) ){ - assert( pCsr->aMatchinfo ); - sqlite3_free(pCsr->aMatchinfo); - pCsr->zMatchinfo = 0; - pCsr->aMatchinfo = 0; + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ ); + switch( p->op ){ + case RTREE_LE: + case RTREE_LT: + case RTREE_EQ: + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the lower bound of the coordinate pair */ + if( p->u.rValue>=val ) return; + if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ + /* Fall through for the RTREE_EQ case */ + + default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ + pCellData += 4; + RTREE_DECODE_COORD(eInt, pCellData, val); + /* val now holds the upper bound of the coordinate pair */ + if( p->u.rValue<=val ) return; } + *peWithin = NOT_WITHIN; +} - /* If Fts3Cursor.aMatchinfo[] is NULL, then this is the first time the - ** matchinfo function has been called for this query. In this case - ** allocate the array used to accumulate the matchinfo data and - ** initialize those elements that are constant for every row. - */ - if( pCsr->aMatchinfo==0 ){ - int nMatchinfo = 0; /* Number of u32 elements in match-info */ - int nArg; /* Bytes in zArg */ - int i; /* Used to iterate through zArg */ +/* +** Check the leaf RTree cell given by pCellData against constraint p. +** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. +** If the constraint is satisfied, leave *peWithin unchanged. +** +** The constraint is of the form: xN op $val +** +** The op is given by p->op. The xN is p->iCoord-th coordinate in +** pCellData. $val is given by p->u.rValue. +*/ +static void rtreeLeafConstraint( + RtreeConstraint *p, /* The constraint to test */ + int eInt, /* True if RTree holds integer coordinates */ + u8 *pCellData, /* Raw cell content as appears on disk */ + int *peWithin /* Adjust downward, as appropriate */ +){ + RtreeDValue xN; /* Coordinate value converted to a double */ - /* Determine the number of phrases in the query */ - pCsr->nPhrase = fts3ExprPhraseCount(pCsr->pExpr); - sInfo.nPhrase = pCsr->nPhrase; + assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE + || p->op==RTREE_GT || p->op==RTREE_EQ ); + pCellData += 8 + p->iCoord*4; + RTREE_DECODE_COORD(eInt, pCellData, xN); + switch( p->op ){ + case RTREE_LE: if( xN <= p->u.rValue ) return; break; + case RTREE_LT: if( xN < p->u.rValue ) return; break; + case RTREE_GE: if( xN >= p->u.rValue ) return; break; + case RTREE_GT: if( xN > p->u.rValue ) return; break; + default: if( xN == p->u.rValue ) return; break; + } + *peWithin = NOT_WITHIN; +} - /* Determine the number of integers in the buffer returned by this call. */ - for(i=0; zArg[i]; i++){ - nMatchinfo += fts3MatchinfoSize(&sInfo, zArg[i]); +/* +** One of the cells in node pNode is guaranteed to have a 64-bit +** integer value equal to iRowid. Return the index of this cell. +*/ +static int nodeRowidIndex( + Rtree *pRtree, + RtreeNode *pNode, + i64 iRowid, + int *piIndex +){ + int ii; + int nCell = NCELL(pNode); + assert( nCell<200 ); + for(ii=0; ii<nCell; ii++){ + if( nodeGetRowid(pRtree, pNode, ii)==iRowid ){ + *piIndex = ii; + return SQLITE_OK; } - - /* Allocate space for Fts3Cursor.aMatchinfo[] and Fts3Cursor.zMatchinfo. */ - nArg = (int)strlen(zArg); - pCsr->aMatchinfo = (u32 *)sqlite3_malloc(sizeof(u32)*nMatchinfo + nArg + 1); - if( !pCsr->aMatchinfo ) return SQLITE_NOMEM; - - pCsr->zMatchinfo = (char *)&pCsr->aMatchinfo[nMatchinfo]; - pCsr->nMatchinfo = nMatchinfo; - memcpy(pCsr->zMatchinfo, zArg, nArg+1); - memset(pCsr->aMatchinfo, 0, sizeof(u32)*nMatchinfo); - pCsr->isMatchinfoNeeded = 1; - bGlobal = 1; } + return SQLITE_CORRUPT_VTAB; +} - sInfo.aMatchinfo = pCsr->aMatchinfo; - sInfo.nPhrase = pCsr->nPhrase; - if( pCsr->isMatchinfoNeeded ){ - rc = fts3MatchinfoValues(pCsr, bGlobal, &sInfo, zArg); - pCsr->isMatchinfoNeeded = 0; +/* +** Return the index of the cell containing a pointer to node pNode +** in its parent. If pNode is the root node, return -1. +*/ +static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){ + RtreeNode *pParent = pNode->pParent; + if( pParent ){ + return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); } - - return rc; + *piIndex = -1; + return SQLITE_OK; } /* -** Implementation of snippet() function. +** Compare two search points. Return negative, zero, or positive if the first +** is less than, equal to, or greater than the second. +** +** The rScore is the primary key. Smaller rScore values come first. +** If the rScore is a tie, then use iLevel as the tie breaker with smaller +** iLevel values coming first. In this way, if rScore is the same for all +** SearchPoints, then iLevel becomes the deciding factor and the result +** is a depth-first search, which is the desired default behavior. */ -SQLITE_PRIVATE void sqlite3Fts3Snippet( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr, /* Cursor object */ - const char *zStart, /* Snippet start text - "<b>" */ - const char *zEnd, /* Snippet end text - "</b>" */ - const char *zEllipsis, /* Snippet ellipsis text - "<b>...</b>" */ - int iCol, /* Extract snippet from this column */ - int nToken /* Approximate number of tokens in snippet */ +static int rtreeSearchPointCompare( + const RtreeSearchPoint *pA, + const RtreeSearchPoint *pB ){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc = SQLITE_OK; - int i; - StrBuffer res = {0, 0, 0}; - - /* The returned text includes up to four fragments of text extracted from - ** the data in the current row. The first iteration of the for(...) loop - ** below attempts to locate a single fragment of text nToken tokens in - ** size that contains at least one instance of all phrases in the query - ** expression that appear in the current row. If such a fragment of text - ** cannot be found, the second iteration of the loop attempts to locate - ** a pair of fragments, and so on. - */ - int nSnippet = 0; /* Number of fragments in this snippet */ - SnippetFragment aSnippet[4]; /* Maximum of 4 fragments per snippet */ - int nFToken = -1; /* Number of tokens in each fragment */ + if( pA->rScore<pB->rScore ) return -1; + if( pA->rScore>pB->rScore ) return +1; + if( pA->iLevel<pB->iLevel ) return -1; + if( pA->iLevel>pB->iLevel ) return +1; + return 0; +} - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; +/* +** Interchange to search points in a cursor. +*/ +static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ + RtreeSearchPoint t = p->aPoint[i]; + assert( i<j ); + p->aPoint[i] = p->aPoint[j]; + p->aPoint[j] = t; + i++; j++; + if( i<RTREE_CACHE_SZ ){ + if( j>=RTREE_CACHE_SZ ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + }else{ + RtreeNode *pTemp = p->aNode[i]; + p->aNode[i] = p->aNode[j]; + p->aNode[j] = pTemp; + } } +} - for(nSnippet=1; 1; nSnippet++){ - - int iSnip; /* Loop counter 0..nSnippet-1 */ - u64 mCovered = 0; /* Bitmask of phrases covered by snippet */ - u64 mSeen = 0; /* Bitmask of phrases seen by BestSnippet() */ +/* +** Return the search point with the lowest current score. +*/ +static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ + return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; +} - if( nToken>=0 ){ - nFToken = (nToken+nSnippet-1) / nSnippet; - }else{ - nFToken = -1 * nToken; - } +/* +** Get the RtreeNode for the search point with the lowest score. +*/ +static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ + sqlite3_int64 id; + int ii = 1 - pCur->bPoint; + assert( ii==0 || ii==1 ); + assert( pCur->bPoint || pCur->nPoint ); + if( pCur->aNode[ii]==0 ){ + assert( pRC!=0 ); + id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; + *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); + } + return pCur->aNode[ii]; +} - for(iSnip=0; iSnip<nSnippet; iSnip++){ - int iBestScore = -1; /* Best score of columns checked so far */ - int iRead; /* Used to iterate through columns */ - SnippetFragment *pFragment = &aSnippet[iSnip]; +/* +** Push a new element onto the priority queue +*/ +static RtreeSearchPoint *rtreeEnqueue( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ +){ + int i, j; + RtreeSearchPoint *pNew; + if( pCur->nPoint>=pCur->nPointAlloc ){ + int nNew = pCur->nPointAlloc*2 + 8; + pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); + if( pNew==0 ) return 0; + pCur->aPoint = pNew; + pCur->nPointAlloc = nNew; + } + i = pCur->nPoint++; + pNew = pCur->aPoint + i; + pNew->rScore = rScore; + pNew->iLevel = iLevel; + assert( iLevel<=RTREE_MAX_DEPTH ); + while( i>0 ){ + RtreeSearchPoint *pParent; + j = (i-1)/2; + pParent = pCur->aPoint + j; + if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; + rtreeSearchPointSwap(pCur, j, i); + i = j; + pNew = pParent; + } + return pNew; +} - memset(pFragment, 0, sizeof(*pFragment)); +/* +** Allocate a new RtreeSearchPoint and return a pointer to it. Return +** NULL if malloc fails. +*/ +static RtreeSearchPoint *rtreeSearchPointNew( + RtreeCursor *pCur, /* The cursor */ + RtreeDValue rScore, /* Score for the new search point */ + u8 iLevel /* Level for the new search point */ +){ + RtreeSearchPoint *pNew, *pFirst; + pFirst = rtreeSearchPointFirst(pCur); + pCur->anQueue[iLevel]++; + if( pFirst==0 + || pFirst->rScore>rScore + || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) + ){ + if( pCur->bPoint ){ + int ii; + pNew = rtreeEnqueue(pCur, rScore, iLevel); + if( pNew==0 ) return 0; + ii = (int)(pNew - pCur->aPoint) + 1; + if( ii<RTREE_CACHE_SZ ){ + assert( pCur->aNode[ii]==0 ); + pCur->aNode[ii] = pCur->aNode[0]; + }else{ + nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); + } + pCur->aNode[0] = 0; + *pNew = pCur->sPoint; + } + pCur->sPoint.rScore = rScore; + pCur->sPoint.iLevel = iLevel; + pCur->bPoint = 1; + return &pCur->sPoint; + }else{ + return rtreeEnqueue(pCur, rScore, iLevel); + } +} - /* Loop through all columns of the table being considered for snippets. - ** If the iCol argument to this function was negative, this means all - ** columns of the FTS3 table. Otherwise, only column iCol is considered. - */ - for(iRead=0; iRead<pTab->nColumn; iRead++){ - SnippetFragment sF = {0, 0, 0, 0}; - int iS = 0; - if( iCol>=0 && iRead!=iCol ) continue; +#if 0 +/* Tracing routines for the RtreeSearchPoint queue */ +static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ + if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } + printf(" %d.%05lld.%02d %g %d", + p->iLevel, p->id, p->iCell, p->rScore, p->eWithin + ); + idx++; + if( idx<RTREE_CACHE_SZ ){ + printf(" %p\n", pCur->aNode[idx]); + }else{ + printf("\n"); + } +} +static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ + int ii; + printf("=== %9s ", zPrefix); + if( pCur->bPoint ){ + tracePoint(&pCur->sPoint, -1, pCur); + } + for(ii=0; ii<pCur->nPoint; ii++){ + if( ii>0 || pCur->bPoint ) printf(" "); + tracePoint(&pCur->aPoint[ii], ii, pCur); + } +} +# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) +#else +# define RTREE_QUEUE_TRACE(A,B) /* no-op */ +#endif - /* Find the best snippet of nFToken tokens in column iRead. */ - rc = fts3BestSnippet(nFToken, pCsr, iRead, mCovered, &mSeen, &sF, &iS); - if( rc!=SQLITE_OK ){ - goto snippet_out; +/* Remove the search point with the lowest current score. +*/ +static void rtreeSearchPointPop(RtreeCursor *p){ + int i, j, k, n; + i = 1 - p->bPoint; + assert( i==0 || i==1 ); + if( p->aNode[i] ){ + nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); + p->aNode[i] = 0; + } + if( p->bPoint ){ + p->anQueue[p->sPoint.iLevel]--; + p->bPoint = 0; + }else if( p->nPoint ){ + p->anQueue[p->aPoint[0].iLevel]--; + n = --p->nPoint; + p->aPoint[0] = p->aPoint[n]; + if( n<RTREE_CACHE_SZ-1 ){ + p->aNode[1] = p->aNode[n+1]; + p->aNode[n+1] = 0; + } + i = 0; + while( (j = i*2+1)<n ){ + k = j+1; + if( k<n && rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[j])<0 ){ + if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, k); + i = k; + }else{ + break; } - if( iS>iBestScore ){ - *pFragment = sF; - iBestScore = iS; + }else{ + if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ + rtreeSearchPointSwap(p, i, j); + i = j; + }else{ + break; } } - - mCovered |= pFragment->covered; } - - /* If all query phrases seen by fts3BestSnippet() are present in at least - ** one of the nSnippet snippet fragments, break out of the loop. - */ - assert( (mCovered&mSeen)==mCovered ); - if( mSeen==mCovered || nSnippet==SizeofArray(aSnippet) ) break; } +} - assert( nFToken>0 ); - for(i=0; i<nSnippet && rc==SQLITE_OK; i++){ - rc = fts3SnippetText(pCsr, &aSnippet[i], - i, (i==nSnippet-1), nFToken, zStart, zEnd, zEllipsis, &res - ); - } +/* +** Continue the search on cursor pCur until the front of the queue +** contains an entry suitable for returning as a result-set row, +** or until the RtreeSearchPoint queue is empty, indicating that the +** query has completed. +*/ +static int rtreeStepToLeaf(RtreeCursor *pCur){ + RtreeSearchPoint *p; + Rtree *pRtree = RTREE_OF_CURSOR(pCur); + RtreeNode *pNode; + int eWithin; + int rc = SQLITE_OK; + int nCell; + int nConstraint = pCur->nConstraint; + int ii; + int eInt; + RtreeSearchPoint x; - snippet_out: - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, -1, sqlite3_free); + eInt = pRtree->eCoordType==RTREE_COORD_INT32; + while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ + pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); + if( rc ) return rc; + nCell = NCELL(pNode); + assert( nCell<200 ); + while( p->iCell<nCell ){ + sqlite3_rtree_dbl rScore = (sqlite3_rtree_dbl)-1; + u8 *pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell); + eWithin = FULLY_WITHIN; + for(ii=0; ii<nConstraint; ii++){ + RtreeConstraint *pConstraint = pCur->aConstraint + ii; + if( pConstraint->op>=RTREE_MATCH ){ + rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, + &rScore, &eWithin); + if( rc ) return rc; + }else if( p->iLevel==1 ){ + rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); + }else{ + rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); + } + if( eWithin==NOT_WITHIN ) break; + } + p->iCell++; + if( eWithin==NOT_WITHIN ) continue; + x.iLevel = p->iLevel - 1; + if( x.iLevel ){ + x.id = readInt64(pCellData); + x.iCell = 0; + }else{ + x.id = p->id; + x.iCell = p->iCell - 1; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-S:"); + rtreeSearchPointPop(pCur); + } + if( rScore<RTREE_ZERO ) rScore = RTREE_ZERO; + p = rtreeSearchPointNew(pCur, rScore, x.iLevel); + if( p==0 ) return SQLITE_NOMEM; + p->eWithin = eWithin; + p->id = x.id; + p->iCell = x.iCell; + RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); + break; + } + if( p->iCell>=nCell ){ + RTREE_QUEUE_TRACE(pCur, "POP-Se:"); + rtreeSearchPointPop(pCur); + } } + pCur->atEOF = p==0; + return SQLITE_OK; } +/* +** Rtree virtual table module xNext method. +*/ +static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + int rc = SQLITE_OK; -typedef struct TermOffset TermOffset; -typedef struct TermOffsetCtx TermOffsetCtx; - -struct TermOffset { - char *pList; /* Position-list */ - int iPos; /* Position just read from pList */ - int iOff; /* Offset of this term from read positions */ -}; + /* Move to the next entry that matches the configured constraints. */ + RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); + rtreeSearchPointPop(pCsr); + rc = rtreeStepToLeaf(pCsr); + return rc; +} -struct TermOffsetCtx { - Fts3Cursor *pCsr; - int iCol; /* Column of table to populate aTerm for */ - int iTerm; - sqlite3_int64 iDocid; - TermOffset *aTerm; -}; +/* +** Rtree virtual table module xRowid method. +*/ +static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + if( rc==SQLITE_OK && p ){ + *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); + } + return rc; +} -/* -** This function is an fts3ExprIterate() callback used by sqlite3Fts3Offsets(). +/* +** Rtree virtual table module xColumn method. */ -static int fts3ExprTermOffsetInit(Fts3Expr *pExpr, int iPhrase, void *ctx){ - TermOffsetCtx *p = (TermOffsetCtx *)ctx; - int nTerm; /* Number of tokens in phrase */ - int iTerm; /* For looping through nTerm phrase terms */ - char *pList; /* Pointer to position list for phrase */ - int iPos = 0; /* First position in position-list */ - int rc; +static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ + Rtree *pRtree = (Rtree *)cur->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)cur; + RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); + RtreeCoord c; + int rc = SQLITE_OK; + RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); - UNUSED_PARAMETER(iPhrase); - rc = sqlite3Fts3EvalPhrasePoslist(p->pCsr, pExpr, p->iCol, &pList); - nTerm = pExpr->pPhrase->nToken; - if( pList ){ - fts3GetDeltaPosition(&pList, &iPos); - assert( iPos>=0 ); + if( rc ) return rc; + if( p==0 ) return SQLITE_OK; + if( i==0 ){ + sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); + }else{ + if( rc ) return rc; + nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + sqlite3_result_double(ctx, c.f); + }else +#endif + { + assert( pRtree->eCoordType==RTREE_COORD_INT32 ); + sqlite3_result_int(ctx, c.i); + } } + return SQLITE_OK; +} - for(iTerm=0; iTerm<nTerm; iTerm++){ - TermOffset *pT = &p->aTerm[p->iTerm++]; - pT->iOff = nTerm-iTerm-1; - pT->pList = pList; - pT->iPos = iPos; +/* +** Use nodeAcquire() to obtain the leaf node containing the record with +** rowid iRowid. If successful, set *ppLeaf to point to the node and +** return SQLITE_OK. If there is no such record in the table, set +** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf +** to zero and return an SQLite error code. +*/ +static int findLeafNode( + Rtree *pRtree, /* RTree to search */ + i64 iRowid, /* The rowid searching for */ + RtreeNode **ppLeaf, /* Write the node here */ + sqlite3_int64 *piNode /* Write the node-id here */ +){ + int rc; + *ppLeaf = 0; + sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); + if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ + i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); + if( piNode ) *piNode = iNode; + rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); + sqlite3_reset(pRtree->pReadRowid); + }else{ + rc = sqlite3_reset(pRtree->pReadRowid); } - return rc; } /* -** Implementation of offsets() function. +** This function is called to configure the RtreeConstraint object passed +** as the second argument for a MATCH constraint. The value passed as the +** first argument to this function is the right-hand operand to the MATCH +** operator. */ -SQLITE_PRIVATE void sqlite3Fts3Offsets( - sqlite3_context *pCtx, /* SQLite function call context */ - Fts3Cursor *pCsr /* Cursor object */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - sqlite3_tokenizer_module const *pMod = pTab->pTokenizer->pModule; - int rc; /* Return Code */ - int nToken; /* Number of tokens in query */ - int iCol; /* Column currently being processed */ - StrBuffer res = {0, 0, 0}; /* Result string */ - TermOffsetCtx sCtx; /* Context for fts3ExprTermOffsetInit() */ - - if( !pCsr->pExpr ){ - sqlite3_result_text(pCtx, "", 0, SQLITE_STATIC); - return; - } - - memset(&sCtx, 0, sizeof(sCtx)); - assert( pCsr->isRequireSeek==0 ); +static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ + RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ + sqlite3_rtree_query_info *pInfo; /* Callback information */ + int nBlob; /* Size of the geometry function blob */ + int nExpected; /* Expected size of the BLOB */ - /* Count the number of terms in the query */ - rc = fts3ExprLoadDoclists(pCsr, 0, &nToken); - if( rc!=SQLITE_OK ) goto offsets_out; + /* Check that value is actually a blob. */ + if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; - /* Allocate the array of TermOffset iterators. */ - sCtx.aTerm = (TermOffset *)sqlite3_malloc(sizeof(TermOffset)*nToken); - if( 0==sCtx.aTerm ){ - rc = SQLITE_NOMEM; - goto offsets_out; + /* Check that the blob is roughly the right size. */ + nBlob = sqlite3_value_bytes(pValue); + if( nBlob<(int)sizeof(RtreeMatchArg) ){ + return SQLITE_ERROR; } - sCtx.iDocid = pCsr->iPrevId; - sCtx.pCsr = pCsr; - /* Loop through the table columns, appending offset information to - ** string-buffer res for each column. - */ - for(iCol=0; iCol<pTab->nColumn; iCol++){ - sqlite3_tokenizer_cursor *pC; /* Tokenizer cursor */ - const char *ZDUMMY; /* Dummy argument used with xNext() */ - int NDUMMY = 0; /* Dummy argument used with xNext() */ - int iStart = 0; - int iEnd = 0; - int iCurrent = 0; - const char *zDoc; - int nDoc; + pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); + if( !pInfo ) return SQLITE_NOMEM; + memset(pInfo, 0, sizeof(*pInfo)); + pBlob = (RtreeMatchArg*)&pInfo[1]; - /* Initialize the contents of sCtx.aTerm[] for column iCol. There is - ** no way that this operation can fail, so the return code from - ** fts3ExprIterate() can be discarded. - */ - sCtx.iCol = iCol; - sCtx.iTerm = 0; - (void)fts3ExprIterate(pCsr->pExpr, fts3ExprTermOffsetInit, (void *)&sCtx); + memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); + nExpected = (int)(sizeof(RtreeMatchArg) + + pBlob->nParam*sizeof(sqlite3_value*) + + (pBlob->nParam-1)*sizeof(RtreeDValue)); + if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ + sqlite3_free(pInfo); + return SQLITE_ERROR; + } + pInfo->pContext = pBlob->cb.pContext; + pInfo->nParam = pBlob->nParam; + pInfo->aParam = pBlob->aParam; + pInfo->apSqlParam = pBlob->apSqlParam; - /* Retreive the text stored in column iCol. If an SQL NULL is stored - ** in column iCol, jump immediately to the next iteration of the loop. - ** If an OOM occurs while retrieving the data (this can happen if SQLite - ** needs to transform the data from utf-16 to utf-8), return SQLITE_NOMEM - ** to the caller. - */ - zDoc = (const char *)sqlite3_column_text(pCsr->pStmt, iCol+1); - nDoc = sqlite3_column_bytes(pCsr->pStmt, iCol+1); - if( zDoc==0 ){ - if( sqlite3_column_type(pCsr->pStmt, iCol+1)==SQLITE_NULL ){ - continue; - } - rc = SQLITE_NOMEM; - goto offsets_out; - } + if( pBlob->cb.xGeom ){ + pCons->u.xGeom = pBlob->cb.xGeom; + }else{ + pCons->op = RTREE_QUERY; + pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; + } + pCons->pInfo = pInfo; + return SQLITE_OK; +} - /* Initialize a tokenizer iterator to iterate through column iCol. */ - rc = sqlite3Fts3OpenTokenizer(pTab->pTokenizer, pCsr->iLangid, - zDoc, nDoc, &pC - ); - if( rc!=SQLITE_OK ) goto offsets_out; +/* +** Rtree virtual table module xFilter method. +*/ +static int rtreeFilter( + sqlite3_vtab_cursor *pVtabCursor, + int idxNum, const char *idxStr, + int argc, sqlite3_value **argv +){ + Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; + RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; + RtreeNode *pRoot = 0; + int ii; + int rc = SQLITE_OK; + int iCell = 0; - rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - while( rc==SQLITE_OK ){ - int i; /* Used to loop through terms */ - int iMinPos = 0x7FFFFFFF; /* Position of next token */ - TermOffset *pTerm = 0; /* TermOffset associated with next token */ + rtreeReference(pRtree); - for(i=0; i<nToken; i++){ - TermOffset *pT = &sCtx.aTerm[i]; - if( pT->pList && (pT->iPos-pT->iOff)<iMinPos ){ - iMinPos = pT->iPos-pT->iOff; - pTerm = pT; - } - } + /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ + freeCursorConstraints(pCsr); + sqlite3_free(pCsr->aPoint); + memset(pCsr, 0, sizeof(RtreeCursor)); + pCsr->base.pVtab = (sqlite3_vtab*)pRtree; - if( !pTerm ){ - /* All offsets for this column have been gathered. */ - rc = SQLITE_DONE; + pCsr->iStrategy = idxNum; + if( idxNum==1 ){ + /* Special case - lookup by rowid. */ + RtreeNode *pLeaf; /* Leaf on which the required cell resides */ + RtreeSearchPoint *p; /* Search point for the the leaf */ + i64 iRowid = sqlite3_value_int64(argv[0]); + i64 iNode = 0; + rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); + if( rc==SQLITE_OK && pLeaf!=0 ){ + p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); + assert( p!=0 ); /* Always returns pCsr->sPoint */ + pCsr->aNode[0] = pLeaf; + p->id = iNode; + p->eWithin = PARTLY_WITHIN; + rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); + p->iCell = iCell; + RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); + }else{ + pCsr->atEOF = 1; + } + }else{ + /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array + ** with the configured constraints. + */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); + if( rc==SQLITE_OK && argc>0 ){ + pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); + pCsr->nConstraint = argc; + if( !pCsr->aConstraint ){ + rc = SQLITE_NOMEM; }else{ - assert( iCurrent<=iMinPos ); - if( 0==(0xFE&*pTerm->pList) ){ - pTerm->pList = 0; - }else{ - fts3GetDeltaPosition(&pTerm->pList, &pTerm->iPos); - } - while( rc==SQLITE_OK && iCurrent<iMinPos ){ - rc = pMod->xNext(pC, &ZDUMMY, &NDUMMY, &iStart, &iEnd, &iCurrent); - } - if( rc==SQLITE_OK ){ - char aBuffer[64]; - sqlite3_snprintf(sizeof(aBuffer), aBuffer, - "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart - ); - rc = fts3StringAppend(&res, aBuffer, -1); - }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){ - rc = FTS_CORRUPT_VTAB; + memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); + memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); + assert( (idxStr==0 && argc==0) + || (idxStr && (int)strlen(idxStr)==argc*2) ); + for(ii=0; ii<argc; ii++){ + RtreeConstraint *p = &pCsr->aConstraint[ii]; + p->op = idxStr[ii*2]; + p->iCoord = idxStr[ii*2+1]-'0'; + if( p->op>=RTREE_MATCH ){ + /* A MATCH operator. The right-hand-side must be a blob that + ** can be cast into an RtreeMatchArg object. One created using + ** an sqlite3_rtree_geometry_callback() SQL user function. + */ + rc = deserializeGeometry(argv[ii], p); + if( rc!=SQLITE_OK ){ + break; + } + p->pInfo->nCoord = pRtree->nDim*2; + p->pInfo->anQueue = pCsr->anQueue; + p->pInfo->mxLevel = pRtree->iDepth + 1; + }else{ +#ifdef SQLITE_RTREE_INT_ONLY + p->u.rValue = sqlite3_value_int64(argv[ii]); +#else + p->u.rValue = sqlite3_value_double(argv[ii]); +#endif + } } } } - if( rc==SQLITE_DONE ){ - rc = SQLITE_OK; + if( rc==SQLITE_OK ){ + RtreeSearchPoint *pNew; + pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1); + if( pNew==0 ) return SQLITE_NOMEM; + pNew->id = 1; + pNew->iCell = 0; + pNew->eWithin = PARTLY_WITHIN; + assert( pCsr->bPoint==1 ); + pCsr->aNode[0] = pRoot; + pRoot = 0; + RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); + rc = rtreeStepToLeaf(pCsr); } - - pMod->xClose(pC); - if( rc!=SQLITE_OK ) goto offsets_out; } - offsets_out: - sqlite3_free(sCtx.aTerm); - assert( rc!=SQLITE_DONE ); - sqlite3Fts3SegmentsClose(pTab); - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pCtx, rc); - sqlite3_free(res.z); - }else{ - sqlite3_result_text(pCtx, res.z, res.n-1, sqlite3_free); - } - return; + nodeRelease(pRtree, pRoot); + rtreeRelease(pRtree); + return rc; } /* -** Implementation of matchinfo() function. +** Set the pIdxInfo->estimatedRows variable to nRow. Unless this +** extension is currently being used by a version of SQLite too old to +** support estimatedRows. In that case this function is a no-op. */ -SQLITE_PRIVATE void sqlite3Fts3Matchinfo( - sqlite3_context *pContext, /* Function call context */ - Fts3Cursor *pCsr, /* FTS3 table cursor */ - const char *zArg /* Second arg to matchinfo() function */ -){ - Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab; - int rc; - int i; - const char *zFormat; - - if( zArg ){ - for(i=0; zArg[i]; i++){ - char *zErr = 0; - if( fts3MatchinfoCheck(pTab, zArg[i], &zErr) ){ - sqlite3_result_error(pContext, zErr, -1); - sqlite3_free(zErr); - return; - } - } - zFormat = zArg; - }else{ - zFormat = FTS3_MATCHINFO_DEFAULT; - } - - if( !pCsr->pExpr ){ - sqlite3_result_blob(pContext, "", 0, SQLITE_STATIC); - return; - } - - /* Retrieve matchinfo() data. */ - rc = fts3GetMatchinfo(pCsr, zFormat); - sqlite3Fts3SegmentsClose(pTab); - - if( rc!=SQLITE_OK ){ - sqlite3_result_error_code(pContext, rc); - }else{ - int n = pCsr->nMatchinfo * sizeof(u32); - sqlite3_result_blob(pContext, pCsr->aMatchinfo, n, SQLITE_TRANSIENT); +static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ +#if SQLITE_VERSION_NUMBER>=3008002 + if( sqlite3_libversion_number()>=3008002 ){ + pIdxInfo->estimatedRows = nRow; } -} - #endif +} -/************** End of fts3_snippet.c ****************************************/ -/************** Begin file fts3_unicode.c ************************************/ /* -** 2012 May 24 +** Rtree virtual table module xBestIndex method. There are three +** table scan strategies to choose from (in order from most to +** least desirable): ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** idxNum idxStr Strategy +** ------------------------------------------------ +** 1 Unused Direct lookup by rowid. +** 2 See below R-tree query or full-table scan. +** ------------------------------------------------ ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** If strategy 1 is used, then idxStr is not meaningful. If strategy +** 2 is used, idxStr is formatted to contain 2 bytes for each +** constraint used. The first two bytes of idxStr correspond to +** the constraint in sqlite3_index_info.aConstraintUsage[] with +** (argvIndex==1) etc. ** -****************************************************************************** +** The first of each pair of bytes in idxStr identifies the constraint +** operator as follows: ** -** Implementation of the "unicode" full-text-search tokenizer. -*/ - -#ifndef SQLITE_DISABLE_FTS3_UNICODE - -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) - -/* #include <assert.h> */ -/* #include <stdlib.h> */ -/* #include <stdio.h> */ -/* #include <string.h> */ - - -/* -** The following two macros - READ_UTF8 and WRITE_UTF8 - have been copied -** from the sqlite3 source file utf.c. If this file is compiled as part -** of the amalgamation, they are not required. +** Operator Byte Value +** ---------------------- +** = 0x41 ('A') +** <= 0x42 ('B') +** < 0x43 ('C') +** >= 0x44 ('D') +** > 0x45 ('E') +** MATCH 0x46 ('F') +** ---------------------- +** +** The second of each pair of bytes identifies the coordinate column +** to which the constraint applies. The leftmost coordinate column +** is 'a', the second from the left 'b' etc. */ -#ifndef SQLITE_AMALGAMATION +static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ + Rtree *pRtree = (Rtree*)tab; + int rc = SQLITE_OK; + int ii; + int bMatch = 0; /* True if there exists a MATCH constraint */ + i64 nRow; /* Estimated rows returned by this scan */ -static const unsigned char sqlite3Utf8Trans1[] = { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, -}; + int iIdx = 0; + char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; + memset(zIdxStr, 0, sizeof(zIdxStr)); -#define READ_UTF8(zIn, zTerm, c) \ - c = *(zIn++); \ - if( c>=0xc0 ){ \ - c = sqlite3Utf8Trans1[c-0xc0]; \ - while( zIn!=zTerm && (*zIn & 0xc0)==0x80 ){ \ - c = (c<<6) + (0x3f & *(zIn++)); \ - } \ - if( c<0x80 \ - || (c&0xFFFFF800)==0xD800 \ - || (c&0xFFFFFFFE)==0xFFFE ){ c = 0xFFFD; } \ + /* Check if there exists a MATCH constraint - even an unusable one. If there + ** is, do not consider the lookup-by-rowid plan as using such a plan would + ** require the VDBE to evaluate the MATCH constraint, which is not currently + ** possible. */ + for(ii=0; ii<pIdxInfo->nConstraint; ii++){ + if( pIdxInfo->aConstraint[ii].op==SQLITE_INDEX_CONSTRAINT_MATCH ){ + bMatch = 1; + } } -#define WRITE_UTF8(zOut, c) { \ - if( c<0x00080 ){ \ - *zOut++ = (u8)(c&0xFF); \ - } \ - else if( c<0x00800 ){ \ - *zOut++ = 0xC0 + (u8)((c>>6)&0x1F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - } \ - else if( c<0x10000 ){ \ - *zOut++ = 0xE0 + (u8)((c>>12)&0x0F); \ - *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - }else{ \ - *zOut++ = 0xF0 + (u8)((c>>18) & 0x07); \ - *zOut++ = 0x80 + (u8)((c>>12) & 0x3F); \ - *zOut++ = 0x80 + (u8)((c>>6) & 0x3F); \ - *zOut++ = 0x80 + (u8)(c & 0x3F); \ - } \ -} + assert( pIdxInfo->idxStr==0 ); + for(ii=0; ii<pIdxInfo->nConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ + struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; + + if( bMatch==0 && p->usable + && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ + ){ + /* We have an equality constraint on the rowid. Use strategy 1. */ + int jj; + for(jj=0; jj<ii; jj++){ + pIdxInfo->aConstraintUsage[jj].argvIndex = 0; + pIdxInfo->aConstraintUsage[jj].omit = 0; + } + pIdxInfo->idxNum = 1; + pIdxInfo->aConstraintUsage[ii].argvIndex = 1; + pIdxInfo->aConstraintUsage[jj].omit = 1; -#endif /* ifndef SQLITE_AMALGAMATION */ + /* This strategy involves a two rowid lookups on an B-Tree structures + ** and then a linear search of an R-Tree node. This should be + ** considered almost as quick as a direct rowid lookup (for which + ** sqlite uses an internal cost of 0.0). It is expected to return + ** a single row. + */ + pIdxInfo->estimatedCost = 30.0; + setEstimatedRows(pIdxInfo, 1); + return SQLITE_OK; + } -typedef struct unicode_tokenizer unicode_tokenizer; -typedef struct unicode_cursor unicode_cursor; + if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ + u8 op; + switch( p->op ){ + case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; + case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; + case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; + case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; + case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; + default: + assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); + op = RTREE_MATCH; + break; + } + zIdxStr[iIdx++] = op; + zIdxStr[iIdx++] = p->iColumn - 1 + '0'; + pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); + pIdxInfo->aConstraintUsage[ii].omit = 1; + } + } -struct unicode_tokenizer { - sqlite3_tokenizer base; - int bRemoveDiacritic; - int nException; - int *aiException; -}; + pIdxInfo->idxNum = 2; + pIdxInfo->needToFreeIdxStr = 1; + if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ + return SQLITE_NOMEM; + } -struct unicode_cursor { - sqlite3_tokenizer_cursor base; - const unsigned char *aInput; /* Input text being tokenized */ - int nInput; /* Size of aInput[] in bytes */ - int iOff; /* Current offset within aInput[] */ - int iToken; /* Index of next token to be returned */ - char *zToken; /* storage for current token */ - int nAlloc; /* space allocated at zToken */ -}; + nRow = pRtree->nRowEst / (iIdx + 1); + pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; + setEstimatedRows(pIdxInfo, nRow); + return rc; +} /* -** Destroy a tokenizer allocated by unicodeCreate(). +** Return the N-dimensional volumn of the cell stored in *p. */ -static int unicodeDestroy(sqlite3_tokenizer *pTokenizer){ - if( pTokenizer ){ - unicode_tokenizer *p = (unicode_tokenizer *)pTokenizer; - sqlite3_free(p->aiException); - sqlite3_free(p); +static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ + RtreeDValue area = (RtreeDValue)1; + int ii; + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]))); } - return SQLITE_OK; + return area; } /* -** As part of a tokenchars= or separators= option, the CREATE VIRTUAL TABLE -** statement has specified that the tokenizer for this table shall consider -** all characters in string zIn/nIn to be separators (if bAlnum==0) or -** token characters (if bAlnum==1). -** -** For each codepoint in the zIn/nIn string, this function checks if the -** sqlite3FtsUnicodeIsalnum() function already returns the desired result. -** If so, no action is taken. Otherwise, the codepoint is added to the -** unicode_tokenizer.aiException[] array. For the purposes of tokenization, -** the return value of sqlite3FtsUnicodeIsalnum() is inverted for all -** codepoints in the aiException[] array. -** -** If a standalone diacritic mark (one that sqlite3FtsUnicodeIsdiacritic() -** identifies as a diacritic) occurs in the zIn/nIn string it is ignored. -** It is not possible to change the behavior of the tokenizer with respect -** to these codepoints. +** Return the margin length of cell p. The margin length is the sum +** of the objects size in each dimension. */ -static int unicodeAddExceptions( - unicode_tokenizer *p, /* Tokenizer to add exceptions to */ - int bAlnum, /* Replace Isalnum() return value with this */ - const char *zIn, /* Array of characters to make exceptions */ - int nIn /* Length of z in bytes */ -){ - const unsigned char *z = (const unsigned char *)zIn; - const unsigned char *zTerm = &z[nIn]; - int iCode; - int nEntry = 0; - - assert( bAlnum==0 || bAlnum==1 ); +static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ + RtreeDValue margin = (RtreeDValue)0; + int ii; + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); + } + return margin; +} - while( z<zTerm ){ - READ_UTF8(z, zTerm, iCode); - assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); - if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum - && sqlite3FtsUnicodeIsdiacritic(iCode)==0 - ){ - nEntry++; +/* +** Store the union of cells p1 and p2 in p1. +*/ +static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ + int ii; + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); + p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); + } + }else{ + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); + p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); } } +} - if( nEntry ){ - int *aNew; /* New aiException[] array */ - int nNew; /* Number of valid entries in array aNew[] */ - - aNew = sqlite3_realloc(p->aiException, (p->nException+nEntry)*sizeof(int)); - if( aNew==0 ) return SQLITE_NOMEM; - nNew = p->nException; - - z = (const unsigned char *)zIn; - while( z<zTerm ){ - READ_UTF8(z, zTerm, iCode); - if( sqlite3FtsUnicodeIsalnum(iCode)!=bAlnum - && sqlite3FtsUnicodeIsdiacritic(iCode)==0 - ){ - int i, j; - for(i=0; i<nNew && aNew[i]<iCode; i++); - for(j=nNew; j>i; j--) aNew[j] = aNew[j-1]; - aNew[i] = iCode; - nNew++; - } +/* +** Return true if the area covered by p2 is a subset of the area covered +** by p1. False otherwise. +*/ +static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ + int ii; + int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); + for(ii=0; ii<(pRtree->nDim*2); ii+=2){ + RtreeCoord *a1 = &p1->aCoord[ii]; + RtreeCoord *a2 = &p2->aCoord[ii]; + if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f)) + || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i)) + ){ + return 0; } - p->aiException = aNew; - p->nException = nNew; } - - return SQLITE_OK; + return 1; } /* -** Return true if the p->aiException[] array contains the value iCode. +** Return the amount cell p would grow by if it were unioned with pCell. */ -static int unicodeIsException(unicode_tokenizer *p, int iCode){ - if( p->nException>0 ){ - int *a = p->aiException; - int iLo = 0; - int iHi = p->nException-1; +static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ + RtreeDValue area; + RtreeCell cell; + memcpy(&cell, p, sizeof(RtreeCell)); + area = cellArea(pRtree, &cell); + cellUnion(pRtree, &cell, pCell); + return (cellArea(pRtree, &cell)-area); +} - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - if( iCode==a[iTest] ){ - return 1; - }else if( iCode>a[iTest] ){ - iLo = iTest+1; +static RtreeDValue cellOverlap( + Rtree *pRtree, + RtreeCell *p, + RtreeCell *aCell, + int nCell +){ + int ii; + RtreeDValue overlap = RTREE_ZERO; + for(ii=0; ii<nCell; ii++){ + int jj; + RtreeDValue o = (RtreeDValue)1; + for(jj=0; jj<(pRtree->nDim*2); jj+=2){ + RtreeDValue x1, x2; + x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); + x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); + if( x2<x1 ){ + o = (RtreeDValue)0; + break; }else{ - iHi = iTest-1; + o = o * (x2-x1); } } + overlap += o; } - - return 0; + return overlap; } -/* -** Return true if, for the purposes of tokenization, codepoint iCode is -** considered a token character (not a separator). -*/ -static int unicodeIsAlnum(unicode_tokenizer *p, int iCode){ - assert( (sqlite3FtsUnicodeIsalnum(iCode) & 0xFFFFFFFE)==0 ); - return sqlite3FtsUnicodeIsalnum(iCode) ^ unicodeIsException(p, iCode); -} /* -** Create a new tokenizer instance. +** This function implements the ChooseLeaf algorithm from Gutman[84]. +** ChooseSubTree in r*tree terminology. */ -static int unicodeCreate( - int nArg, /* Size of array argv[] */ - const char * const *azArg, /* Tokenizer creation arguments */ - sqlite3_tokenizer **pp /* OUT: New tokenizer handle */ +static int ChooseLeaf( + Rtree *pRtree, /* Rtree table */ + RtreeCell *pCell, /* Cell to insert into rtree */ + int iHeight, /* Height of sub-tree rooted at pCell */ + RtreeNode **ppLeaf /* OUT: Selected leaf page */ ){ - unicode_tokenizer *pNew; /* New tokenizer object */ - int i; - int rc = SQLITE_OK; + int rc; + int ii; + RtreeNode *pNode; + rc = nodeAcquire(pRtree, 1, 0, &pNode); - pNew = (unicode_tokenizer *) sqlite3_malloc(sizeof(unicode_tokenizer)); - if( pNew==NULL ) return SQLITE_NOMEM; - memset(pNew, 0, sizeof(unicode_tokenizer)); - pNew->bRemoveDiacritic = 1; + for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){ + int iCell; + sqlite3_int64 iBest = 0; - for(i=0; rc==SQLITE_OK && i<nArg; i++){ - const char *z = azArg[i]; - int n = (int)strlen(z); + RtreeDValue fMinGrowth = RTREE_ZERO; + RtreeDValue fMinArea = RTREE_ZERO; - if( n==19 && memcmp("remove_diacritics=1", z, 19)==0 ){ - pNew->bRemoveDiacritic = 1; - } - else if( n==19 && memcmp("remove_diacritics=0", z, 19)==0 ){ - pNew->bRemoveDiacritic = 0; - } - else if( n>=11 && memcmp("tokenchars=", z, 11)==0 ){ - rc = unicodeAddExceptions(pNew, 1, &z[11], n-11); - } - else if( n>=11 && memcmp("separators=", z, 11)==0 ){ - rc = unicodeAddExceptions(pNew, 0, &z[11], n-11); - } - else{ - /* Unrecognized argument */ - rc = SQLITE_ERROR; + int nCell = NCELL(pNode); + RtreeCell cell; + RtreeNode *pChild; + + RtreeCell *aCell = 0; + + /* Select the child node which will be enlarged the least if pCell + ** is inserted into it. Resolve ties by choosing the entry with + ** the smallest area. + */ + for(iCell=0; iCell<nCell; iCell++){ + int bBest = 0; + RtreeDValue growth; + RtreeDValue area; + nodeGetCell(pRtree, pNode, iCell, &cell); + growth = cellGrowth(pRtree, &cell, pCell); + area = cellArea(pRtree, &cell); + if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){ + bBest = 1; + } + if( bBest ){ + fMinGrowth = growth; + fMinArea = area; + iBest = cell.iRowid; + } } - } - if( rc!=SQLITE_OK ){ - unicodeDestroy((sqlite3_tokenizer *)pNew); - pNew = 0; + sqlite3_free(aCell); + rc = nodeAcquire(pRtree, iBest, pNode, &pChild); + nodeRelease(pRtree, pNode); + pNode = pChild; } - *pp = (sqlite3_tokenizer *)pNew; + + *ppLeaf = pNode; return rc; } /* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. +** A cell with the same content as pCell has just been inserted into +** the node pNode. This function updates the bounding box cells in +** all ancestor elements. */ -static int unicodeOpen( - sqlite3_tokenizer *p, /* The tokenizer */ - const char *aInput, /* Input string */ - int nInput, /* Size of string aInput in bytes */ - sqlite3_tokenizer_cursor **pp /* OUT: New cursor object */ +static int AdjustTree( + Rtree *pRtree, /* Rtree table */ + RtreeNode *pNode, /* Adjust ancestry of this node. */ + RtreeCell *pCell /* This cell was just inserted */ ){ - unicode_cursor *pCsr; + RtreeNode *p = pNode; + while( p->pParent ){ + RtreeNode *pParent = p->pParent; + RtreeCell cell; + int iCell; - pCsr = (unicode_cursor *)sqlite3_malloc(sizeof(unicode_cursor)); - if( pCsr==0 ){ - return SQLITE_NOMEM; - } - memset(pCsr, 0, sizeof(unicode_cursor)); + if( nodeParentIndex(pRtree, p, &iCell) ){ + return SQLITE_CORRUPT_VTAB; + } - pCsr->aInput = (const unsigned char *)aInput; - if( aInput==0 ){ - pCsr->nInput = 0; - }else if( nInput<0 ){ - pCsr->nInput = (int)strlen(aInput); - }else{ - pCsr->nInput = nInput; + nodeGetCell(pRtree, pParent, iCell, &cell); + if( !cellContains(pRtree, &cell, pCell) ){ + cellUnion(pRtree, &cell, pCell); + nodeOverwriteCell(pRtree, pParent, &cell, iCell); + } + + p = pParent; } - - *pp = &pCsr->base; - UNUSED_PARAMETER(p); return SQLITE_OK; } /* -** Close a tokenization cursor previously opened by a call to -** simpleOpen() above. +** Write mapping (iRowid->iNode) to the <rtree>_rowid table. */ -static int unicodeClose(sqlite3_tokenizer_cursor *pCursor){ - unicode_cursor *pCsr = (unicode_cursor *) pCursor; - sqlite3_free(pCsr->zToken); - sqlite3_free(pCsr); - return SQLITE_OK; +static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ + sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); + sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); + sqlite3_step(pRtree->pWriteRowid); + return sqlite3_reset(pRtree->pWriteRowid); } /* -** Extract the next token from a tokenization cursor. The cursor must -** have been opened by a prior call to simpleOpen(). +** Write mapping (iNode->iPar) to the <rtree>_parent table. */ -static int unicodeNext( - sqlite3_tokenizer_cursor *pC, /* Cursor returned by simpleOpen */ - const char **paToken, /* OUT: Token text */ - int *pnToken, /* OUT: Number of bytes at *paToken */ - int *piStart, /* OUT: Starting offset of token */ - int *piEnd, /* OUT: Ending offset of token */ - int *piPos /* OUT: Position integer of token */ -){ - unicode_cursor *pCsr = (unicode_cursor *)pC; - unicode_tokenizer *p = ((unicode_tokenizer *)pCsr->base.pTokenizer); - int iCode = 0; - char *zOut; - const unsigned char *z = &pCsr->aInput[pCsr->iOff]; - const unsigned char *zStart = z; - const unsigned char *zEnd; - const unsigned char *zTerm = &pCsr->aInput[pCsr->nInput]; - - /* Scan past any delimiter characters before the start of the next token. - ** Return SQLITE_DONE early if this takes us all the way to the end of - ** the input. */ - while( z<zTerm ){ - READ_UTF8(z, zTerm, iCode); - if( unicodeIsAlnum(p, iCode) ) break; - zStart = z; - } - if( zStart>=zTerm ) return SQLITE_DONE; - - zOut = pCsr->zToken; - do { - int iOut; - - /* Grow the output buffer if required. */ - if( (zOut-pCsr->zToken)>=(pCsr->nAlloc-4) ){ - char *zNew = sqlite3_realloc(pCsr->zToken, pCsr->nAlloc+64); - if( !zNew ) return SQLITE_NOMEM; - zOut = &zNew[zOut - pCsr->zToken]; - pCsr->zToken = zNew; - pCsr->nAlloc += 64; - } - - /* Write the folded case of the last character read to the output */ - zEnd = z; - iOut = sqlite3FtsUnicodeFold(iCode, p->bRemoveDiacritic); - if( iOut ){ - WRITE_UTF8(zOut, iOut); - } - - /* If the cursor is not at EOF, read the next character */ - if( z>=zTerm ) break; - READ_UTF8(z, zTerm, iCode); - }while( unicodeIsAlnum(p, iCode) - || sqlite3FtsUnicodeIsdiacritic(iCode) - ); - - /* Set the output variables and return. */ - pCsr->iOff = (int)(z - pCsr->aInput); - *paToken = pCsr->zToken; - *pnToken = (int)(zOut - pCsr->zToken); - *piStart = (int)(zStart - pCsr->aInput); - *piEnd = (int)(zEnd - pCsr->aInput); - *piPos = pCsr->iToken++; - return SQLITE_OK; +static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ + sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); + sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); + sqlite3_step(pRtree->pWriteParent); + return sqlite3_reset(pRtree->pWriteParent); } -/* -** Set *ppModule to a pointer to the sqlite3_tokenizer_module -** structure for the unicode tokenizer. -*/ -SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const **ppModule){ - static const sqlite3_tokenizer_module module = { - 0, - unicodeCreate, - unicodeDestroy, - unicodeOpen, - unicodeClose, - unicodeNext, - 0, - }; - *ppModule = &module; -} +static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ -#endif /* ifndef SQLITE_DISABLE_FTS3_UNICODE */ -/************** End of fts3_unicode.c ****************************************/ -/************** Begin file fts3_unicode2.c ***********************************/ /* -** 2012 May 25 +** Arguments aIdx, aDistance and aSpare all point to arrays of size +** nIdx. The aIdx array contains the set of integers from 0 to +** (nIdx-1) in no particular order. This function sorts the values +** in aIdx according to the indexed values in aDistance. For +** example, assuming the inputs: ** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: +** aIdx = { 0, 1, 2, 3 } +** aDistance = { 5.0, 2.0, 7.0, 6.0 } ** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** this function sets the aIdx array to contain: ** -****************************************************************************** +** aIdx = { 0, 1, 2, 3 } +** +** The aSpare array is used as temporary working space by the +** sorting algorithm. */ +static void SortByDistance( + int *aIdx, + int nIdx, + RtreeDValue *aDistance, + int *aSpare +){ + if( nIdx>1 ){ + int iLeft = 0; + int iRight = 0; -/* -** DO NOT EDIT THIS MACHINE GENERATED FILE. -*/ + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; -#ifndef SQLITE_DISABLE_FTS3_UNICODE -#if defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) + SortByDistance(aLeft, nLeft, aDistance, aSpare); + SortByDistance(aRight, nRight, aDistance, aSpare); -/* #include <assert.h> */ + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + + while( iLeft<nLeft || iRight<nRight ){ + if( iLeft==nLeft ){ + aIdx[iLeft+iRight] = aRight[iRight]; + iRight++; + }else if( iRight==nRight ){ + aIdx[iLeft+iRight] = aLeft[iLeft]; + iLeft++; + }else{ + RtreeDValue fLeft = aDistance[aLeft[iLeft]]; + RtreeDValue fRight = aDistance[aRight[iRight]]; + if( fLeft<fRight ){ + aIdx[iLeft+iRight] = aLeft[iLeft]; + iLeft++; + }else{ + aIdx[iLeft+iRight] = aRight[iRight]; + iRight++; + } + } + } + +#if 0 + /* Check that the sort worked */ + { + int jj; + for(jj=1; jj<nIdx; jj++){ + RtreeDValue left = aDistance[aIdx[jj-1]]; + RtreeDValue right = aDistance[aIdx[jj]]; + assert( left<=right ); + } + } +#endif + } +} /* -** Return true if the argument corresponds to a unicode codepoint -** classified as either a letter or a number. Otherwise false. +** Arguments aIdx, aCell and aSpare all point to arrays of size +** nIdx. The aIdx array contains the set of integers from 0 to +** (nIdx-1) in no particular order. This function sorts the values +** in aIdx according to dimension iDim of the cells in aCell. The +** minimum value of dimension iDim is considered first, the +** maximum used to break ties. ** -** The results are undefined if the value passed to this function -** is less than zero. +** The aSpare array is used as temporary working space by the +** sorting algorithm. */ -SQLITE_PRIVATE int sqlite3FtsUnicodeIsalnum(int c){ - /* Each unsigned integer in the following array corresponds to a contiguous - ** range of unicode codepoints that are not either letters or numbers (i.e. - ** codepoints for which this function should return 0). - ** - ** The most significant 22 bits in each 32-bit value contain the first - ** codepoint in the range. The least significant 10 bits are used to store - ** the size of the range (always at least 1). In other words, the value - ** ((C<<22) + N) represents a range of N codepoints starting with codepoint - ** C. It is not possible to represent a range larger than 1023 codepoints - ** using this format. - */ - static const unsigned int aEntry[] = { - 0x00000030, 0x0000E807, 0x00016C06, 0x0001EC2F, 0x0002AC07, - 0x0002D001, 0x0002D803, 0x0002EC01, 0x0002FC01, 0x00035C01, - 0x0003DC01, 0x000B0804, 0x000B480E, 0x000B9407, 0x000BB401, - 0x000BBC81, 0x000DD401, 0x000DF801, 0x000E1002, 0x000E1C01, - 0x000FD801, 0x00120808, 0x00156806, 0x00162402, 0x00163C01, - 0x00164437, 0x0017CC02, 0x00180005, 0x00181816, 0x00187802, - 0x00192C15, 0x0019A804, 0x0019C001, 0x001B5001, 0x001B580F, - 0x001B9C07, 0x001BF402, 0x001C000E, 0x001C3C01, 0x001C4401, - 0x001CC01B, 0x001E980B, 0x001FAC09, 0x001FD804, 0x00205804, - 0x00206C09, 0x00209403, 0x0020A405, 0x0020C00F, 0x00216403, - 0x00217801, 0x0023901B, 0x00240004, 0x0024E803, 0x0024F812, - 0x00254407, 0x00258804, 0x0025C001, 0x00260403, 0x0026F001, - 0x0026F807, 0x00271C02, 0x00272C03, 0x00275C01, 0x00278802, - 0x0027C802, 0x0027E802, 0x00280403, 0x0028F001, 0x0028F805, - 0x00291C02, 0x00292C03, 0x00294401, 0x0029C002, 0x0029D401, - 0x002A0403, 0x002AF001, 0x002AF808, 0x002B1C03, 0x002B2C03, - 0x002B8802, 0x002BC002, 0x002C0403, 0x002CF001, 0x002CF807, - 0x002D1C02, 0x002D2C03, 0x002D5802, 0x002D8802, 0x002DC001, - 0x002E0801, 0x002EF805, 0x002F1803, 0x002F2804, 0x002F5C01, - 0x002FCC08, 0x00300403, 0x0030F807, 0x00311803, 0x00312804, - 0x00315402, 0x00318802, 0x0031FC01, 0x00320802, 0x0032F001, - 0x0032F807, 0x00331803, 0x00332804, 0x00335402, 0x00338802, - 0x00340802, 0x0034F807, 0x00351803, 0x00352804, 0x00355C01, - 0x00358802, 0x0035E401, 0x00360802, 0x00372801, 0x00373C06, - 0x00375801, 0x00376008, 0x0037C803, 0x0038C401, 0x0038D007, - 0x0038FC01, 0x00391C09, 0x00396802, 0x003AC401, 0x003AD006, - 0x003AEC02, 0x003B2006, 0x003C041F, 0x003CD00C, 0x003DC417, - 0x003E340B, 0x003E6424, 0x003EF80F, 0x003F380D, 0x0040AC14, - 0x00412806, 0x00415804, 0x00417803, 0x00418803, 0x00419C07, - 0x0041C404, 0x0042080C, 0x00423C01, 0x00426806, 0x0043EC01, - 0x004D740C, 0x004E400A, 0x00500001, 0x0059B402, 0x005A0001, - 0x005A6C02, 0x005BAC03, 0x005C4803, 0x005CC805, 0x005D4802, - 0x005DC802, 0x005ED023, 0x005F6004, 0x005F7401, 0x0060000F, - 0x0062A401, 0x0064800C, 0x0064C00C, 0x00650001, 0x00651002, - 0x0066C011, 0x00672002, 0x00677822, 0x00685C05, 0x00687802, - 0x0069540A, 0x0069801D, 0x0069FC01, 0x006A8007, 0x006AA006, - 0x006C0005, 0x006CD011, 0x006D6823, 0x006E0003, 0x006E840D, - 0x006F980E, 0x006FF004, 0x00709014, 0x0070EC05, 0x0071F802, - 0x00730008, 0x00734019, 0x0073B401, 0x0073C803, 0x00770027, - 0x0077F004, 0x007EF401, 0x007EFC03, 0x007F3403, 0x007F7403, - 0x007FB403, 0x007FF402, 0x00800065, 0x0081A806, 0x0081E805, - 0x00822805, 0x0082801A, 0x00834021, 0x00840002, 0x00840C04, - 0x00842002, 0x00845001, 0x00845803, 0x00847806, 0x00849401, - 0x00849C01, 0x0084A401, 0x0084B801, 0x0084E802, 0x00850005, - 0x00852804, 0x00853C01, 0x00864264, 0x00900027, 0x0091000B, - 0x0092704E, 0x00940200, 0x009C0475, 0x009E53B9, 0x00AD400A, - 0x00B39406, 0x00B3BC03, 0x00B3E404, 0x00B3F802, 0x00B5C001, - 0x00B5FC01, 0x00B7804F, 0x00B8C00C, 0x00BA001A, 0x00BA6C59, - 0x00BC00D6, 0x00BFC00C, 0x00C00005, 0x00C02019, 0x00C0A807, - 0x00C0D802, 0x00C0F403, 0x00C26404, 0x00C28001, 0x00C3EC01, - 0x00C64002, 0x00C6580A, 0x00C70024, 0x00C8001F, 0x00C8A81E, - 0x00C94001, 0x00C98020, 0x00CA2827, 0x00CB003F, 0x00CC0100, - 0x01370040, 0x02924037, 0x0293F802, 0x02983403, 0x0299BC10, - 0x029A7C01, 0x029BC008, 0x029C0017, 0x029C8002, 0x029E2402, - 0x02A00801, 0x02A01801, 0x02A02C01, 0x02A08C09, 0x02A0D804, - 0x02A1D004, 0x02A20002, 0x02A2D011, 0x02A33802, 0x02A38012, - 0x02A3E003, 0x02A4980A, 0x02A51C0D, 0x02A57C01, 0x02A60004, - 0x02A6CC1B, 0x02A77802, 0x02A8A40E, 0x02A90C01, 0x02A93002, - 0x02A97004, 0x02A9DC03, 0x02A9EC01, 0x02AAC001, 0x02AAC803, - 0x02AADC02, 0x02AAF802, 0x02AB0401, 0x02AB7802, 0x02ABAC07, - 0x02ABD402, 0x02AF8C0B, 0x03600001, 0x036DFC02, 0x036FFC02, - 0x037FFC01, 0x03EC7801, 0x03ECA401, 0x03EEC810, 0x03F4F802, - 0x03F7F002, 0x03F8001A, 0x03F88007, 0x03F8C023, 0x03F95013, - 0x03F9A004, 0x03FBFC01, 0x03FC040F, 0x03FC6807, 0x03FCEC06, - 0x03FD6C0B, 0x03FF8007, 0x03FFA007, 0x03FFE405, 0x04040003, - 0x0404DC09, 0x0405E411, 0x0406400C, 0x0407402E, 0x040E7C01, - 0x040F4001, 0x04215C01, 0x04247C01, 0x0424FC01, 0x04280403, - 0x04281402, 0x04283004, 0x0428E003, 0x0428FC01, 0x04294009, - 0x0429FC01, 0x042CE407, 0x04400003, 0x0440E016, 0x04420003, - 0x0442C012, 0x04440003, 0x04449C0E, 0x04450004, 0x04460003, - 0x0446CC0E, 0x04471404, 0x045AAC0D, 0x0491C004, 0x05BD442E, - 0x05BE3C04, 0x074000F6, 0x07440027, 0x0744A4B5, 0x07480046, - 0x074C0057, 0x075B0401, 0x075B6C01, 0x075BEC01, 0x075C5401, - 0x075CD401, 0x075D3C01, 0x075DBC01, 0x075E2401, 0x075EA401, - 0x075F0C01, 0x07BBC002, 0x07C0002C, 0x07C0C064, 0x07C2800F, - 0x07C2C40E, 0x07C3040F, 0x07C3440F, 0x07C4401F, 0x07C4C03C, - 0x07C5C02B, 0x07C7981D, 0x07C8402B, 0x07C90009, 0x07C94002, - 0x07CC0021, 0x07CCC006, 0x07CCDC46, 0x07CE0014, 0x07CE8025, - 0x07CF1805, 0x07CF8011, 0x07D0003F, 0x07D10001, 0x07D108B6, - 0x07D3E404, 0x07D4003E, 0x07D50004, 0x07D54018, 0x07D7EC46, - 0x07D9140B, 0x07DA0046, 0x07DC0074, 0x38000401, 0x38008060, - 0x380400F0, - }; - static const unsigned int aAscii[4] = { - 0xFFFFFFFF, 0xFC00FFFF, 0xF8000001, 0xF8000001, - }; +static void SortByDimension( + Rtree *pRtree, + int *aIdx, + int nIdx, + int iDim, + RtreeCell *aCell, + int *aSpare +){ + if( nIdx>1 ){ - if( c<128 ){ - return ( (aAscii[c >> 5] & (1 << (c & 0x001F)))==0 ); - }else if( c<(1<<22) ){ - unsigned int key = (((unsigned int)c)<<10) | 0x000003FF; - int iRes = 0; - int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; - int iLo = 0; - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - if( key >= aEntry[iTest] ){ - iRes = iTest; - iLo = iTest+1; + int iLeft = 0; + int iRight = 0; + + int nLeft = nIdx/2; + int nRight = nIdx-nLeft; + int *aLeft = aIdx; + int *aRight = &aIdx[nLeft]; + + SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); + SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); + + memcpy(aSpare, aLeft, sizeof(int)*nLeft); + aLeft = aSpare; + while( iLeft<nLeft || iRight<nRight ){ + RtreeDValue xleft1 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2]); + RtreeDValue xleft2 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2+1]); + RtreeDValue xright1 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2]); + RtreeDValue xright2 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2+1]); + if( (iLeft!=nLeft) && ((iRight==nRight) + || (xleft1<xright1) + || (xleft1==xright1 && xleft2<xright2) + )){ + aIdx[iLeft+iRight] = aLeft[iLeft]; + iLeft++; }else{ - iHi = iTest-1; + aIdx[iLeft+iRight] = aRight[iRight]; + iRight++; } } - assert( aEntry[0]<key ); - assert( key>=aEntry[iRes] ); - return (((unsigned int)c) >= ((aEntry[iRes]>>10) + (aEntry[iRes]&0x3FF))); + +#if 0 + /* Check that the sort worked */ + { + int jj; + for(jj=1; jj<nIdx; jj++){ + RtreeDValue xleft1 = aCell[aIdx[jj-1]].aCoord[iDim*2]; + RtreeDValue xleft2 = aCell[aIdx[jj-1]].aCoord[iDim*2+1]; + RtreeDValue xright1 = aCell[aIdx[jj]].aCoord[iDim*2]; + RtreeDValue xright2 = aCell[aIdx[jj]].aCoord[iDim*2+1]; + assert( xleft1<=xright1 && (xleft1<xright1 || xleft2<=xright2) ); + } + } +#endif } - return 1; } - /* -** If the argument is a codepoint corresponding to a lowercase letter -** in the ASCII range with a diacritic added, return the codepoint -** of the ASCII letter only. For example, if passed 235 - "LATIN -** SMALL LETTER E WITH DIAERESIS" - return 65 ("LATIN SMALL LETTER -** E"). The resuls of passing a codepoint that corresponds to an -** uppercase letter are undefined. +** Implementation of the R*-tree variant of SplitNode from Beckman[1990]. */ -static int remove_diacritic(int c){ - unsigned short aDia[] = { - 0, 1797, 1848, 1859, 1891, 1928, 1940, 1995, - 2024, 2040, 2060, 2110, 2168, 2206, 2264, 2286, - 2344, 2383, 2472, 2488, 2516, 2596, 2668, 2732, - 2782, 2842, 2894, 2954, 2984, 3000, 3028, 3336, - 3456, 3696, 3712, 3728, 3744, 3896, 3912, 3928, - 3968, 4008, 4040, 4106, 4138, 4170, 4202, 4234, - 4266, 4296, 4312, 4344, 4408, 4424, 4472, 4504, - 6148, 6198, 6264, 6280, 6360, 6429, 6505, 6529, - 61448, 61468, 61534, 61592, 61642, 61688, 61704, 61726, - 61784, 61800, 61836, 61880, 61914, 61948, 61998, 62122, - 62154, 62200, 62218, 62302, 62364, 62442, 62478, 62536, - 62554, 62584, 62604, 62640, 62648, 62656, 62664, 62730, - 62924, 63050, 63082, 63274, 63390, - }; - char aChar[] = { - '\0', 'a', 'c', 'e', 'i', 'n', 'o', 'u', 'y', 'y', 'a', 'c', - 'd', 'e', 'e', 'g', 'h', 'i', 'j', 'k', 'l', 'n', 'o', 'r', - 's', 't', 'u', 'u', 'w', 'y', 'z', 'o', 'u', 'a', 'i', 'o', - 'u', 'g', 'k', 'o', 'j', 'g', 'n', 'a', 'e', 'i', 'o', 'r', - 'u', 's', 't', 'h', 'a', 'e', 'o', 'y', '\0', '\0', '\0', '\0', - '\0', '\0', '\0', '\0', 'a', 'b', 'd', 'd', 'e', 'f', 'g', 'h', - 'h', 'i', 'k', 'l', 'l', 'm', 'n', 'p', 'r', 'r', 's', 't', - 'u', 'v', 'w', 'w', 'x', 'y', 'z', 'h', 't', 'w', 'y', 'a', - 'e', 'i', 'o', 'u', 'y', - }; +static int splitNodeStartree( + Rtree *pRtree, + RtreeCell *aCell, + int nCell, + RtreeNode *pLeft, + RtreeNode *pRight, + RtreeCell *pBboxLeft, + RtreeCell *pBboxRight +){ + int **aaSorted; + int *aSpare; + int ii; - unsigned int key = (((unsigned int)c)<<3) | 0x00000007; - int iRes = 0; - int iHi = sizeof(aDia)/sizeof(aDia[0]) - 1; - int iLo = 0; - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - if( key >= aDia[iTest] ){ - iRes = iTest; - iLo = iTest+1; - }else{ - iHi = iTest-1; + int iBestDim = 0; + int iBestSplit = 0; + RtreeDValue fBestMargin = RTREE_ZERO; + + int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); + + aaSorted = (int **)sqlite3_malloc(nByte); + if( !aaSorted ){ + return SQLITE_NOMEM; + } + + aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; + memset(aaSorted, 0, nByte); + for(ii=0; ii<pRtree->nDim; ii++){ + int jj; + aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; + for(jj=0; jj<nCell; jj++){ + aaSorted[ii][jj] = jj; } + SortByDimension(pRtree, aaSorted[ii], nCell, ii, aCell, aSpare); } - assert( key>=aDia[iRes] ); - return ((c > (aDia[iRes]>>3) + (aDia[iRes]&0x07)) ? c : (int)aChar[iRes]); + + for(ii=0; ii<pRtree->nDim; ii++){ + RtreeDValue margin = RTREE_ZERO; + RtreeDValue fBestOverlap = RTREE_ZERO; + RtreeDValue fBestArea = RTREE_ZERO; + int iBestLeft = 0; + int nLeft; + + for( + nLeft=RTREE_MINCELLS(pRtree); + nLeft<=(nCell-RTREE_MINCELLS(pRtree)); + nLeft++ + ){ + RtreeCell left; + RtreeCell right; + int kk; + RtreeDValue overlap; + RtreeDValue area; + + memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); + memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); + for(kk=1; kk<(nCell-1); kk++){ + if( kk<nLeft ){ + cellUnion(pRtree, &left, &aCell[aaSorted[ii][kk]]); + }else{ + cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]); + } + } + margin += cellMargin(pRtree, &left); + margin += cellMargin(pRtree, &right); + overlap = cellOverlap(pRtree, &left, &right, 1); + area = cellArea(pRtree, &left) + cellArea(pRtree, &right); + if( (nLeft==RTREE_MINCELLS(pRtree)) + || (overlap<fBestOverlap) + || (overlap==fBestOverlap && area<fBestArea) + ){ + iBestLeft = nLeft; + fBestOverlap = overlap; + fBestArea = area; + } + } + + if( ii==0 || margin<fBestMargin ){ + iBestDim = ii; + fBestMargin = margin; + iBestSplit = iBestLeft; + } + } + + memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell)); + memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell)); + for(ii=0; ii<nCell; ii++){ + RtreeNode *pTarget = (ii<iBestSplit)?pLeft:pRight; + RtreeCell *pBbox = (ii<iBestSplit)?pBboxLeft:pBboxRight; + RtreeCell *pCell = &aCell[aaSorted[iBestDim][ii]]; + nodeInsertCell(pRtree, pTarget, pCell); + cellUnion(pRtree, pBbox, pCell); + } + + sqlite3_free(aaSorted); + return SQLITE_OK; } -/* -** Return true if the argument interpreted as a unicode codepoint -** is a diacritical modifier character. -*/ -SQLITE_PRIVATE int sqlite3FtsUnicodeIsdiacritic(int c){ - unsigned int mask0 = 0x08029FDF; - unsigned int mask1 = 0x000361F8; - if( c<768 || c>817 ) return 0; - return (c < 768+32) ? - (mask0 & (1 << (c-768))) : - (mask1 & (1 << (c-768-32))); +static int updateMapping( + Rtree *pRtree, + i64 iRowid, + RtreeNode *pNode, + int iHeight +){ + int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); + xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); + if( iHeight>0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + return xSetMapping(pRtree, iRowid, pNode->iNode); } +static int SplitNode( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int i; + int newCellIsRight = 0; -/* -** Interpret the argument as a unicode codepoint. If the codepoint -** is an upper case character that has a lower case equivalent, -** return the codepoint corresponding to the lower case version. -** Otherwise, return a copy of the argument. -** -** The results are undefined if the value passed to this function -** is less than zero. -*/ -SQLITE_PRIVATE int sqlite3FtsUnicodeFold(int c, int bRemoveDiacritic){ - /* Each entry in the following array defines a rule for folding a range - ** of codepoints to lower case. The rule applies to a range of nRange - ** codepoints starting at codepoint iCode. - ** - ** If the least significant bit in flags is clear, then the rule applies - ** to all nRange codepoints (i.e. all nRange codepoints are upper case and - ** need to be folded). Or, if it is set, then the rule only applies to - ** every second codepoint in the range, starting with codepoint C. - ** - ** The 7 most significant bits in flags are an index into the aiOff[] - ** array. If a specific codepoint C does require folding, then its lower - ** case equivalent is ((C + aiOff[flags>>1]) & 0xFFFF). - ** - ** The contents of this array are generated by parsing the CaseFolding.txt - ** file distributed as part of the "Unicode Character Database". See - ** http://www.unicode.org for details. + int rc = SQLITE_OK; + int nCell = NCELL(pNode); + RtreeCell *aCell; + int *aiUsed; + + RtreeNode *pLeft = 0; + RtreeNode *pRight = 0; + + RtreeCell leftbbox; + RtreeCell rightbbox; + + /* Allocate an array and populate it with a copy of pCell and + ** all cells from node pLeft. Then zero the original node. */ - static const struct TableEntry { - unsigned short iCode; - unsigned char flags; - unsigned char nRange; - } aEntry[] = { - {65, 14, 26}, {181, 64, 1}, {192, 14, 23}, - {216, 14, 7}, {256, 1, 48}, {306, 1, 6}, - {313, 1, 16}, {330, 1, 46}, {376, 116, 1}, - {377, 1, 6}, {383, 104, 1}, {385, 50, 1}, - {386, 1, 4}, {390, 44, 1}, {391, 0, 1}, - {393, 42, 2}, {395, 0, 1}, {398, 32, 1}, - {399, 38, 1}, {400, 40, 1}, {401, 0, 1}, - {403, 42, 1}, {404, 46, 1}, {406, 52, 1}, - {407, 48, 1}, {408, 0, 1}, {412, 52, 1}, - {413, 54, 1}, {415, 56, 1}, {416, 1, 6}, - {422, 60, 1}, {423, 0, 1}, {425, 60, 1}, - {428, 0, 1}, {430, 60, 1}, {431, 0, 1}, - {433, 58, 2}, {435, 1, 4}, {439, 62, 1}, - {440, 0, 1}, {444, 0, 1}, {452, 2, 1}, - {453, 0, 1}, {455, 2, 1}, {456, 0, 1}, - {458, 2, 1}, {459, 1, 18}, {478, 1, 18}, - {497, 2, 1}, {498, 1, 4}, {502, 122, 1}, - {503, 134, 1}, {504, 1, 40}, {544, 110, 1}, - {546, 1, 18}, {570, 70, 1}, {571, 0, 1}, - {573, 108, 1}, {574, 68, 1}, {577, 0, 1}, - {579, 106, 1}, {580, 28, 1}, {581, 30, 1}, - {582, 1, 10}, {837, 36, 1}, {880, 1, 4}, - {886, 0, 1}, {902, 18, 1}, {904, 16, 3}, - {908, 26, 1}, {910, 24, 2}, {913, 14, 17}, - {931, 14, 9}, {962, 0, 1}, {975, 4, 1}, - {976, 140, 1}, {977, 142, 1}, {981, 146, 1}, - {982, 144, 1}, {984, 1, 24}, {1008, 136, 1}, - {1009, 138, 1}, {1012, 130, 1}, {1013, 128, 1}, - {1015, 0, 1}, {1017, 152, 1}, {1018, 0, 1}, - {1021, 110, 3}, {1024, 34, 16}, {1040, 14, 32}, - {1120, 1, 34}, {1162, 1, 54}, {1216, 6, 1}, - {1217, 1, 14}, {1232, 1, 88}, {1329, 22, 38}, - {4256, 66, 38}, {4295, 66, 1}, {4301, 66, 1}, - {7680, 1, 150}, {7835, 132, 1}, {7838, 96, 1}, - {7840, 1, 96}, {7944, 150, 8}, {7960, 150, 6}, - {7976, 150, 8}, {7992, 150, 8}, {8008, 150, 6}, - {8025, 151, 8}, {8040, 150, 8}, {8072, 150, 8}, - {8088, 150, 8}, {8104, 150, 8}, {8120, 150, 2}, - {8122, 126, 2}, {8124, 148, 1}, {8126, 100, 1}, - {8136, 124, 4}, {8140, 148, 1}, {8152, 150, 2}, - {8154, 120, 2}, {8168, 150, 2}, {8170, 118, 2}, - {8172, 152, 1}, {8184, 112, 2}, {8186, 114, 2}, - {8188, 148, 1}, {8486, 98, 1}, {8490, 92, 1}, - {8491, 94, 1}, {8498, 12, 1}, {8544, 8, 16}, - {8579, 0, 1}, {9398, 10, 26}, {11264, 22, 47}, - {11360, 0, 1}, {11362, 88, 1}, {11363, 102, 1}, - {11364, 90, 1}, {11367, 1, 6}, {11373, 84, 1}, - {11374, 86, 1}, {11375, 80, 1}, {11376, 82, 1}, - {11378, 0, 1}, {11381, 0, 1}, {11390, 78, 2}, - {11392, 1, 100}, {11499, 1, 4}, {11506, 0, 1}, - {42560, 1, 46}, {42624, 1, 24}, {42786, 1, 14}, - {42802, 1, 62}, {42873, 1, 4}, {42877, 76, 1}, - {42878, 1, 10}, {42891, 0, 1}, {42893, 74, 1}, - {42896, 1, 4}, {42912, 1, 10}, {42922, 72, 1}, - {65313, 14, 26}, - }; - static const unsigned short aiOff[] = { - 1, 2, 8, 15, 16, 26, 28, 32, - 37, 38, 40, 48, 63, 64, 69, 71, - 79, 80, 116, 202, 203, 205, 206, 207, - 209, 210, 211, 213, 214, 217, 218, 219, - 775, 7264, 10792, 10795, 23228, 23256, 30204, 54721, - 54753, 54754, 54756, 54787, 54793, 54809, 57153, 57274, - 57921, 58019, 58363, 61722, 65268, 65341, 65373, 65406, - 65408, 65410, 65415, 65424, 65436, 65439, 65450, 65462, - 65472, 65476, 65478, 65480, 65482, 65488, 65506, 65511, - 65514, 65521, 65527, 65528, 65529, - }; + aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); + if( !aCell ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } + aiUsed = (int *)&aCell[nCell+1]; + memset(aiUsed, 0, sizeof(int)*(nCell+1)); + for(i=0; i<nCell; i++){ + nodeGetCell(pRtree, pNode, i, &aCell[i]); + } + nodeZero(pRtree, pNode); + memcpy(&aCell[nCell], pCell, sizeof(RtreeCell)); + nCell++; + + if( pNode->iNode==1 ){ + pRight = nodeNew(pRtree, pNode); + pLeft = nodeNew(pRtree, pNode); + pRtree->iDepth++; + pNode->isDirty = 1; + writeInt16(pNode->zData, pRtree->iDepth); + }else{ + pLeft = pNode; + pRight = nodeNew(pRtree, pLeft->pParent); + nodeReference(pLeft); + } - int ret = c; + if( !pLeft || !pRight ){ + rc = SQLITE_NOMEM; + goto splitnode_out; + } - assert( c>=0 ); - assert( sizeof(unsigned short)==2 && sizeof(unsigned char)==1 ); + memset(pLeft->zData, 0, pRtree->iNodeSize); + memset(pRight->zData, 0, pRtree->iNodeSize); - if( c<128 ){ - if( c>='A' && c<='Z' ) ret = c + ('a' - 'A'); - }else if( c<65536 ){ - int iHi = sizeof(aEntry)/sizeof(aEntry[0]) - 1; - int iLo = 0; - int iRes = -1; + rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, + &leftbbox, &rightbbox); + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } - while( iHi>=iLo ){ - int iTest = (iHi + iLo) / 2; - int cmp = (c - aEntry[iTest].iCode); - if( cmp>=0 ){ - iRes = iTest; - iLo = iTest+1; - }else{ - iHi = iTest-1; - } + /* Ensure both child nodes have node numbers assigned to them by calling + ** nodeWrite(). Node pRight always needs a node number, as it was created + ** by nodeNew() above. But node pLeft sometimes already has a node number. + ** In this case avoid the all to nodeWrite(). + */ + if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) + || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) + ){ + goto splitnode_out; + } + + rightbbox.iRowid = pRight->iNode; + leftbbox.iRowid = pLeft->iNode; + + if( pNode->iNode==1 ){ + rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); + if( rc!=SQLITE_OK ){ + goto splitnode_out; } - assert( iRes<0 || c>=aEntry[iRes].iCode ); + }else{ + RtreeNode *pParent = pLeft->pParent; + int iCell; + rc = nodeParentIndex(pRtree, pLeft, &iCell); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); + rc = AdjustTree(pRtree, pParent, &leftbbox); + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + } + if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ + goto splitnode_out; + } - if( iRes>=0 ){ - const struct TableEntry *p = &aEntry[iRes]; - if( c<(p->iCode + p->nRange) && 0==(0x01 & p->flags & (p->iCode ^ c)) ){ - ret = (c + (aiOff[p->flags>>1])) & 0x0000FFFF; - assert( ret>0 ); + for(i=0; i<NCELL(pRight); i++){ + i64 iRowid = nodeGetRowid(pRtree, pRight, i); + rc = updateMapping(pRtree, iRowid, pRight, iHeight); + if( iRowid==pCell->iRowid ){ + newCellIsRight = 1; + } + if( rc!=SQLITE_OK ){ + goto splitnode_out; + } + } + if( pNode->iNode==1 ){ + for(i=0; i<NCELL(pLeft); i++){ + i64 iRowid = nodeGetRowid(pRtree, pLeft, i); + rc = updateMapping(pRtree, iRowid, pLeft, iHeight); + if( rc!=SQLITE_OK ){ + goto splitnode_out; } } + }else if( newCellIsRight==0 ){ + rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight); + } - if( bRemoveDiacritic ) ret = remove_diacritic(ret); + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRight); + pRight = 0; } - - else if( c>=66560 && c<66600 ){ - ret = c + 40; + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pLeft); + pLeft = 0; } - return ret; +splitnode_out: + nodeRelease(pRtree, pRight); + nodeRelease(pRtree, pLeft); + sqlite3_free(aCell); + return rc; } -#endif /* defined(SQLITE_ENABLE_FTS3) || defined(SQLITE_ENABLE_FTS4) */ -#endif /* !defined(SQLITE_DISABLE_FTS3_UNICODE) */ -/************** End of fts3_unicode2.c ***************************************/ -/************** Begin file rtree.c *******************************************/ /* -** 2001 September 15 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. +** If node pLeaf is not the root of the r-tree and its pParent pointer is +** still NULL, load all ancestor nodes of pLeaf into memory and populate +** the pLeaf->pParent chain all the way up to the root node. ** -************************************************************************* -** This file contains code for implementations of the r-tree and r*-tree -** algorithms packaged as an SQLite virtual table module. +** This operation is required when a row is deleted (or updated - an update +** is implemented as a delete followed by an insert). SQLite provides the +** rowid of the row to delete, which can be used to find the leaf on which +** the entry resides (argument pLeaf). Once the leaf is located, this +** function is called to determine its ancestry. */ +static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ + int rc = SQLITE_OK; + RtreeNode *pChild = pLeaf; + while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ + int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ + sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); + rc = sqlite3_step(pRtree->pReadParent); + if( rc==SQLITE_ROW ){ + RtreeNode *pTest; /* Used to test for reference loops */ + i64 iNode; /* Node number of parent node */ -/* -** Database Format of R-Tree Tables -** -------------------------------- -** -** The data structure for a single virtual r-tree table is stored in three -** native SQLite tables declared as follows. In each case, the '%' character -** in the table name is replaced with the user-supplied name of the r-tree -** table. -** -** CREATE TABLE %_node(nodeno INTEGER PRIMARY KEY, data BLOB) -** CREATE TABLE %_parent(nodeno INTEGER PRIMARY KEY, parentnode INTEGER) -** CREATE TABLE %_rowid(rowid INTEGER PRIMARY KEY, nodeno INTEGER) -** -** The data for each node of the r-tree structure is stored in the %_node -** table. For each node that is not the root node of the r-tree, there is -** an entry in the %_parent table associating the node with its parent. -** And for each row of data in the table, there is an entry in the %_rowid -** table that maps from the entries rowid to the id of the node that it -** is stored on. -** -** The root node of an r-tree always exists, even if the r-tree table is -** empty. The nodeno of the root node is always 1. All other nodes in the -** table must be the same size as the root node. The content of each node -** is formatted as follows: -** -** 1. If the node is the root node (node 1), then the first 2 bytes -** of the node contain the tree depth as a big-endian integer. -** For non-root nodes, the first 2 bytes are left unused. -** -** 2. The next 2 bytes contain the number of entries currently -** stored in the node. -** -** 3. The remainder of the node contains the node entries. Each entry -** consists of a single 8-byte integer followed by an even number -** of 4-byte coordinates. For leaf nodes the integer is the rowid -** of a record. For internal nodes it is the node number of a -** child page. -*/ + /* Before setting pChild->pParent, test that we are not creating a + ** loop of references (as we would if, say, pChild==pParent). We don't + ** want to do this as it leads to a memory leak when trying to delete + ** the referenced counted node structures. + */ + iNode = sqlite3_column_int64(pRtree->pReadParent, 0); + for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); + if( !pTest ){ + rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); + } + } + rc = sqlite3_reset(pRtree->pReadParent); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; + pChild = pChild->pParent; + } + return rc; +} -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RTREE) +static int deleteCell(Rtree *, RtreeNode *, int, int); -#ifndef SQLITE_CORE - SQLITE_EXTENSION_INIT1 -#else -#endif +static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ + int rc; + int rc2; + RtreeNode *pParent = 0; + int iCell; -/* #include <string.h> */ -/* #include <assert.h> */ -/* #include <stdio.h> */ + assert( pNode->nRef==1 ); -#ifndef SQLITE_AMALGAMATION -#include "sqlite3rtree.h" -typedef sqlite3_int64 i64; -typedef unsigned char u8; -typedef unsigned short u16; -typedef unsigned int u32; -#endif + /* Remove the entry in the parent cell. */ + rc = nodeParentIndex(pRtree, pNode, &iCell); + if( rc==SQLITE_OK ){ + pParent = pNode->pParent; + pNode->pParent = 0; + rc = deleteCell(pRtree, pParent, iCell, iHeight+1); + } + rc2 = nodeRelease(pRtree, pParent); + if( rc==SQLITE_OK ){ + rc = rc2; + } + if( rc!=SQLITE_OK ){ + return rc; + } -/* The following macro is used to suppress compiler warnings. -*/ -#ifndef UNUSED_PARAMETER -# define UNUSED_PARAMETER(x) (void)(x) -#endif + /* Remove the xxx_node entry. */ + sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteNode); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ + return rc; + } -typedef struct Rtree Rtree; -typedef struct RtreeCursor RtreeCursor; -typedef struct RtreeNode RtreeNode; -typedef struct RtreeCell RtreeCell; -typedef struct RtreeConstraint RtreeConstraint; -typedef struct RtreeMatchArg RtreeMatchArg; -typedef struct RtreeGeomCallback RtreeGeomCallback; -typedef union RtreeCoord RtreeCoord; -typedef struct RtreeSearchPoint RtreeSearchPoint; + /* Remove the xxx_parent entry. */ + sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); + sqlite3_step(pRtree->pDeleteParent); + if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ + return rc; + } + + /* Remove the node from the in-memory hash table and link it into + ** the Rtree.pDeleted list. Its contents will be re-inserted later on. + */ + nodeHashDelete(pRtree, pNode); + pNode->iNode = iHeight; + pNode->pNext = pRtree->pDeleted; + pNode->nRef++; + pRtree->pDeleted = pNode; -/* The rtree may have between 1 and RTREE_MAX_DIMENSIONS dimensions. */ -#define RTREE_MAX_DIMENSIONS 5 + return SQLITE_OK; +} -/* Size of hash table Rtree.aHash. This hash table is not expected to -** ever contain very many entries, so a fixed number of buckets is -** used. -*/ -#define HASHSIZE 97 +static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ + RtreeNode *pParent = pNode->pParent; + int rc = SQLITE_OK; + if( pParent ){ + int ii; + int nCell = NCELL(pNode); + RtreeCell box; /* Bounding box for pNode */ + nodeGetCell(pRtree, pNode, 0, &box); + for(ii=1; ii<nCell; ii++){ + RtreeCell cell; + nodeGetCell(pRtree, pNode, ii, &cell); + cellUnion(pRtree, &box, &cell); + } + box.iRowid = pNode->iNode; + rc = nodeParentIndex(pRtree, pNode, &ii); + if( rc==SQLITE_OK ){ + nodeOverwriteCell(pRtree, pParent, &box, ii); + rc = fixBoundingBox(pRtree, pParent); + } + } + return rc; +} -/* The xBestIndex method of this virtual table requires an estimate of -** the number of rows in the virtual table to calculate the costs of -** various strategies. If possible, this estimate is loaded from the -** sqlite_stat1 table (with RTREE_MIN_ROWEST as a hard-coded minimum). -** Otherwise, if no sqlite_stat1 entry is available, use -** RTREE_DEFAULT_ROWEST. +/* +** Delete the cell at index iCell of node pNode. After removing the +** cell, adjust the r-tree data structure if required. */ -#define RTREE_DEFAULT_ROWEST 1048576 -#define RTREE_MIN_ROWEST 100 +static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ + RtreeNode *pParent; + int rc; -/* -** An rtree virtual-table object. -*/ -struct Rtree { - sqlite3_vtab base; /* Base class. Must be first */ - sqlite3 *db; /* Host database connection */ - int iNodeSize; /* Size in bytes of each node in the node table */ - u8 nDim; /* Number of dimensions */ - u8 eCoordType; /* RTREE_COORD_REAL32 or RTREE_COORD_INT32 */ - u8 nBytesPerCell; /* Bytes consumed per cell */ - int iDepth; /* Current depth of the r-tree structure */ - char *zDb; /* Name of database containing r-tree table */ - char *zName; /* Name of r-tree table */ - int nBusy; /* Current number of users of this structure */ - i64 nRowEst; /* Estimated number of rows in this table */ + if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ + return rc; + } - /* List of nodes removed during a CondenseTree operation. List is - ** linked together via the pointer normally used for hash chains - - ** RtreeNode.pNext. RtreeNode.iNode stores the depth of the sub-tree - ** headed by the node (leaf nodes have RtreeNode.iNode==0). + /* Remove the cell from the node. This call just moves bytes around + ** the in-memory node image, so it cannot fail. */ - RtreeNode *pDeleted; - int iReinsertHeight; /* Height of sub-trees Reinsert() has run on */ + nodeDeleteCell(pRtree, pNode, iCell); - /* Statements to read/write/delete a record from xxx_node */ - sqlite3_stmt *pReadNode; - sqlite3_stmt *pWriteNode; - sqlite3_stmt *pDeleteNode; + /* If the node is not the tree root and now has less than the minimum + ** number of cells, remove it from the tree. Otherwise, update the + ** cell in the parent node so that it tightly contains the updated + ** node. + */ + pParent = pNode->pParent; + assert( pParent || pNode->iNode==1 ); + if( pParent ){ + if( NCELL(pNode)<RTREE_MINCELLS(pRtree) ){ + rc = removeNode(pRtree, pNode, iHeight); + }else{ + rc = fixBoundingBox(pRtree, pNode); + } + } - /* Statements to read/write/delete a record from xxx_rowid */ - sqlite3_stmt *pReadRowid; - sqlite3_stmt *pWriteRowid; - sqlite3_stmt *pDeleteRowid; + return rc; +} - /* Statements to read/write/delete a record from xxx_parent */ - sqlite3_stmt *pReadParent; - sqlite3_stmt *pWriteParent; - sqlite3_stmt *pDeleteParent; +static int Reinsert( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int *aOrder; + int *aSpare; + RtreeCell *aCell; + RtreeDValue *aDistance; + int nCell; + RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS]; + int iDim; + int ii; + int rc = SQLITE_OK; + int n; - RtreeNode *aHash[HASHSIZE]; /* Hash table of in-memory nodes. */ -}; + memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS); -/* Possible values for Rtree.eCoordType: */ -#define RTREE_COORD_REAL32 0 -#define RTREE_COORD_INT32 1 + nCell = NCELL(pNode)+1; + n = (nCell+1)&(~1); -/* -** If SQLITE_RTREE_INT_ONLY is defined, then this virtual table will -** only deal with integer coordinates. No floating point operations -** will be done. -*/ -#ifdef SQLITE_RTREE_INT_ONLY - typedef sqlite3_int64 RtreeDValue; /* High accuracy coordinate */ - typedef int RtreeValue; /* Low accuracy coordinate */ -# define RTREE_ZERO 0 -#else - typedef double RtreeDValue; /* High accuracy coordinate */ - typedef float RtreeValue; /* Low accuracy coordinate */ -# define RTREE_ZERO 0.0 -#endif + /* Allocate the buffers used by this operation. The allocation is + ** relinquished before this function returns. + */ + aCell = (RtreeCell *)sqlite3_malloc(n * ( + sizeof(RtreeCell) + /* aCell array */ + sizeof(int) + /* aOrder array */ + sizeof(int) + /* aSpare array */ + sizeof(RtreeDValue) /* aDistance array */ + )); + if( !aCell ){ + return SQLITE_NOMEM; + } + aOrder = (int *)&aCell[n]; + aSpare = (int *)&aOrder[n]; + aDistance = (RtreeDValue *)&aSpare[n]; -/* -** When doing a search of an r-tree, instances of the following structure -** record intermediate results from the tree walk. -** -** The id is always a node-id. For iLevel>=1 the id is the node-id of -** the node that the RtreeSearchPoint represents. When iLevel==0, however, -** the id is of the parent node and the cell that RtreeSearchPoint -** represents is the iCell-th entry in the parent node. -*/ -struct RtreeSearchPoint { - RtreeDValue rScore; /* The score for this node. Smallest goes first. */ - sqlite3_int64 id; /* Node ID */ - u8 iLevel; /* 0=entries. 1=leaf node. 2+ for higher */ - u8 eWithin; /* PARTLY_WITHIN or FULLY_WITHIN */ - u8 iCell; /* Cell index within the node */ -}; + for(ii=0; ii<nCell; ii++){ + if( ii==(nCell-1) ){ + memcpy(&aCell[ii], pCell, sizeof(RtreeCell)); + }else{ + nodeGetCell(pRtree, pNode, ii, &aCell[ii]); + } + aOrder[ii] = ii; + for(iDim=0; iDim<pRtree->nDim; iDim++){ + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); + aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); + } + } + for(iDim=0; iDim<pRtree->nDim; iDim++){ + aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); + } -/* -** The minimum number of cells allowed for a node is a third of the -** maximum. In Gutman's notation: -** -** m = M/3 -** -** If an R*-tree "Reinsert" operation is required, the same number of -** cells are removed from the overfull node and reinserted into the tree. -*/ -#define RTREE_MINCELLS(p) ((((p)->iNodeSize-4)/(p)->nBytesPerCell)/3) -#define RTREE_REINSERT(p) RTREE_MINCELLS(p) -#define RTREE_MAXCELLS 51 + for(ii=0; ii<nCell; ii++){ + aDistance[ii] = RTREE_ZERO; + for(iDim=0; iDim<pRtree->nDim; iDim++){ + RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - + DCOORD(aCell[ii].aCoord[iDim*2])); + aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); + } + } -/* -** The smallest possible node-size is (512-64)==448 bytes. And the largest -** supported cell size is 48 bytes (8 byte rowid + ten 4 byte coordinates). -** Therefore all non-root nodes must contain at least 3 entries. Since -** 2^40 is greater than 2^64, an r-tree structure always has a depth of -** 40 or less. -*/ -#define RTREE_MAX_DEPTH 40 + SortByDistance(aOrder, nCell, aDistance, aSpare); + nodeZero(pRtree, pNode); + + for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ + RtreeCell *p = &aCell[aOrder[ii]]; + nodeInsertCell(pRtree, pNode, p); + if( p->iRowid==pCell->iRowid ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, p->iRowid, pNode->iNode); + } + } + } + if( rc==SQLITE_OK ){ + rc = fixBoundingBox(pRtree, pNode); + } + for(; rc==SQLITE_OK && ii<nCell; ii++){ + /* Find a node to store this cell in. pNode->iNode currently contains + ** the height of the sub-tree headed by the cell. + */ + RtreeNode *pInsert; + RtreeCell *p = &aCell[aOrder[ii]]; + rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + sqlite3_free(aCell); + return rc; +} /* -** Number of entries in the cursor RtreeNode cache. The first entry is -** used to cache the RtreeNode for RtreeCursor.sPoint. The remaining -** entries cache the RtreeNode for the first elements of the priority queue. +** Insert cell pCell into node pNode. Node pNode is the head of a +** subtree iHeight high (leaf nodes have iHeight==0). */ -#define RTREE_CACHE_SZ 5 +static int rtreeInsertCell( + Rtree *pRtree, + RtreeNode *pNode, + RtreeCell *pCell, + int iHeight +){ + int rc = SQLITE_OK; + if( iHeight>0 ){ + RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); + if( pChild ){ + nodeRelease(pRtree, pChild->pParent); + nodeReference(pNode); + pChild->pParent = pNode; + } + } + if( nodeInsertCell(pRtree, pNode, pCell) ){ + if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ + rc = SplitNode(pRtree, pNode, pCell, iHeight); + }else{ + pRtree->iReinsertHeight = iHeight; + rc = Reinsert(pRtree, pNode, pCell, iHeight); + } + }else{ + rc = AdjustTree(pRtree, pNode, pCell); + if( rc==SQLITE_OK ){ + if( iHeight==0 ){ + rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); + }else{ + rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + } + } + } + return rc; +} -/* -** An rtree cursor object. -*/ -struct RtreeCursor { - sqlite3_vtab_cursor base; /* Base class. Must be first */ - u8 atEOF; /* True if at end of search */ - u8 bPoint; /* True if sPoint is valid */ - int iStrategy; /* Copy of idxNum search parameter */ - int nConstraint; /* Number of entries in aConstraint */ - RtreeConstraint *aConstraint; /* Search constraints. */ - int nPointAlloc; /* Number of slots allocated for aPoint[] */ - int nPoint; /* Number of slots used in aPoint[] */ - int mxLevel; /* iLevel value for root of the tree */ - RtreeSearchPoint *aPoint; /* Priority queue for search points */ - RtreeSearchPoint sPoint; /* Cached next search point */ - RtreeNode *aNode[RTREE_CACHE_SZ]; /* Rtree node cache */ - u32 anQueue[RTREE_MAX_DEPTH+1]; /* Number of queued entries by iLevel */ -}; +static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ + int ii; + int rc = SQLITE_OK; + int nCell = NCELL(pNode); -/* Return the Rtree of a RtreeCursor */ -#define RTREE_OF_CURSOR(X) ((Rtree*)((X)->base.pVtab)) + for(ii=0; rc==SQLITE_OK && ii<nCell; ii++){ + RtreeNode *pInsert; + RtreeCell cell; + nodeGetCell(pRtree, pNode, ii, &cell); -/* -** A coordinate can be either a floating point number or a integer. All -** coordinates within a single R-Tree are always of the same time. -*/ -union RtreeCoord { - RtreeValue f; /* Floating point value */ - int i; /* Integer value */ - u32 u; /* Unsigned for byte-order conversions */ -}; + /* Find a node to store this cell in. pNode->iNode currently contains + ** the height of the sub-tree headed by the cell. + */ + rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); + if( rc==SQLITE_OK ){ + int rc2; + rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); + rc2 = nodeRelease(pRtree, pInsert); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } + } + return rc; +} /* -** The argument is an RtreeCoord. Return the value stored within the RtreeCoord -** formatted as a RtreeDValue (double or int64). This macro assumes that local -** variable pRtree points to the Rtree structure associated with the -** RtreeCoord. +** Select a currently unused rowid for a new r-tree record. */ -#ifdef SQLITE_RTREE_INT_ONLY -# define DCOORD(coord) ((RtreeDValue)coord.i) -#else -# define DCOORD(coord) ( \ - (pRtree->eCoordType==RTREE_COORD_REAL32) ? \ - ((double)coord.f) : \ - ((double)coord.i) \ - ) -#endif +static int newRowid(Rtree *pRtree, i64 *piRowid){ + int rc; + sqlite3_bind_null(pRtree->pWriteRowid, 1); + sqlite3_bind_null(pRtree->pWriteRowid, 2); + sqlite3_step(pRtree->pWriteRowid); + rc = sqlite3_reset(pRtree->pWriteRowid); + *piRowid = sqlite3_last_insert_rowid(pRtree->db); + return rc; +} /* -** A search constraint. +** Remove the entry with rowid=iDelete from the r-tree structure. */ -struct RtreeConstraint { - int iCoord; /* Index of constrained coordinate */ - int op; /* Constraining operation */ - union { - RtreeDValue rValue; /* Constraint value. */ - int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*); - int (*xQueryFunc)(sqlite3_rtree_query_info*); - } u; - sqlite3_rtree_query_info *pInfo; /* xGeom and xQueryFunc argument */ -}; +static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ + int rc; /* Return code */ + RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ + int iCell; /* Index of iDelete cell in pLeaf */ + RtreeNode *pRoot; /* Root node of rtree structure */ -/* Possible values for RtreeConstraint.op */ -#define RTREE_EQ 0x41 /* A */ -#define RTREE_LE 0x42 /* B */ -#define RTREE_LT 0x43 /* C */ -#define RTREE_GE 0x44 /* D */ -#define RTREE_GT 0x45 /* E */ -#define RTREE_MATCH 0x46 /* F: Old-style sqlite3_rtree_geometry_callback() */ -#define RTREE_QUERY 0x47 /* G: New-style sqlite3_rtree_query_callback() */ + /* Obtain a reference to the root node to initialize Rtree.iDepth */ + rc = nodeAcquire(pRtree, 1, 0, &pRoot); -/* -** An rtree structure node. -*/ -struct RtreeNode { - RtreeNode *pParent; /* Parent node */ - i64 iNode; /* The node number */ - int nRef; /* Number of references to this node */ - int isDirty; /* True if the node needs to be written to disk */ - u8 *zData; /* Content of the node, as should be on disk */ - RtreeNode *pNext; /* Next node in this hash collision chain */ -}; + /* Obtain a reference to the leaf node that contains the entry + ** about to be deleted. + */ + if( rc==SQLITE_OK ){ + rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); + } -/* Return the number of cells in a node */ -#define NCELL(pNode) readInt16(&(pNode)->zData[2]) + /* Delete the cell in question from the leaf node. */ + if( rc==SQLITE_OK ){ + int rc2; + rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); + if( rc==SQLITE_OK ){ + rc = deleteCell(pRtree, pLeaf, iCell, 0); + } + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } -/* -** A single cell from a node, deserialized -*/ -struct RtreeCell { - i64 iRowid; /* Node or entry ID */ - RtreeCoord aCoord[RTREE_MAX_DIMENSIONS*2]; /* Bounding box coordinates */ -}; + /* Delete the corresponding entry in the <rtree>_rowid table. */ + if( rc==SQLITE_OK ){ + sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); + sqlite3_step(pRtree->pDeleteRowid); + rc = sqlite3_reset(pRtree->pDeleteRowid); + } + /* Check if the root node now has exactly one child. If so, remove + ** it, schedule the contents of the child for reinsertion and + ** reduce the tree height by one. + ** + ** This is equivalent to copying the contents of the child into + ** the root node (the operation that Gutman's paper says to perform + ** in this scenario). + */ + if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ + int rc2; + RtreeNode *pChild; + i64 iChild = nodeGetRowid(pRtree, pRoot, 0); + rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); + if( rc==SQLITE_OK ){ + rc = removeNode(pRtree, pChild, pRtree->iDepth-1); + } + rc2 = nodeRelease(pRtree, pChild); + if( rc==SQLITE_OK ) rc = rc2; + if( rc==SQLITE_OK ){ + pRtree->iDepth--; + writeInt16(pRoot->zData, pRtree->iDepth); + pRoot->isDirty = 1; + } + } -/* -** This object becomes the sqlite3_user_data() for the SQL functions -** that are created by sqlite3_rtree_geometry_callback() and -** sqlite3_rtree_query_callback() and which appear on the right of MATCH -** operators in order to constrain a search. -** -** xGeom and xQueryFunc are the callback functions. Exactly one of -** xGeom and xQueryFunc fields is non-NULL, depending on whether the -** SQL function was created using sqlite3_rtree_geometry_callback() or -** sqlite3_rtree_query_callback(). -** -** This object is deleted automatically by the destructor mechanism in -** sqlite3_create_function_v2(). -*/ -struct RtreeGeomCallback { - int (*xGeom)(sqlite3_rtree_geometry*, int, RtreeDValue*, int*); - int (*xQueryFunc)(sqlite3_rtree_query_info*); - void (*xDestructor)(void*); - void *pContext; -}; + /* Re-insert the contents of any underfull nodes removed from the tree. */ + for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ + if( rc==SQLITE_OK ){ + rc = reinsertNodeContent(pRtree, pLeaf); + } + pRtree->pDeleted = pLeaf->pNext; + sqlite3_free(pLeaf); + } + /* Release the reference to the root node. */ + if( rc==SQLITE_OK ){ + rc = nodeRelease(pRtree, pRoot); + }else{ + nodeRelease(pRtree, pRoot); + } -/* -** Value for the first field of every RtreeMatchArg object. The MATCH -** operator tests that the first field of a blob operand matches this -** value to avoid operating on invalid blobs (which could cause a segfault). -*/ -#define RTREE_GEOMETRY_MAGIC 0x891245AB + return rc; +} /* -** An instance of this structure (in the form of a BLOB) is returned by -** the SQL functions that sqlite3_rtree_geometry_callback() and -** sqlite3_rtree_query_callback() create, and is read as the right-hand -** operand to the MATCH operator of an R-Tree. +** Rounding constants for float->double conversion. */ -struct RtreeMatchArg { - u32 magic; /* Always RTREE_GEOMETRY_MAGIC */ - RtreeGeomCallback cb; /* Info about the callback functions */ - int nParam; /* Number of parameters to the SQL function */ - RtreeDValue aParam[1]; /* Values for parameters to the SQL function */ -}; - -#ifndef MAX -# define MAX(x,y) ((x) < (y) ? (y) : (x)) -#endif -#ifndef MIN -# define MIN(x,y) ((x) > (y) ? (y) : (x)) -#endif +#define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ +#define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ +#if !defined(SQLITE_RTREE_INT_ONLY) /* -** Functions to deserialize a 16 bit integer, 32 bit real number and -** 64 bit integer. The deserialized value is returned. +** Convert an sqlite3_value into an RtreeValue (presumably a float) +** while taking care to round toward negative or positive, respectively. */ -static int readInt16(u8 *p){ - return (p[0]<<8) + p[1]; -} -static void readCoord(u8 *p, RtreeCoord *pCoord){ - pCoord->u = ( - (((u32)p[0]) << 24) + - (((u32)p[1]) << 16) + - (((u32)p[2]) << 8) + - (((u32)p[3]) << 0) - ); +static RtreeValue rtreeValueDown(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( f>d ){ + f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); + } + return f; } -static i64 readInt64(u8 *p){ - return ( - (((i64)p[0]) << 56) + - (((i64)p[1]) << 48) + - (((i64)p[2]) << 40) + - (((i64)p[3]) << 32) + - (((i64)p[4]) << 24) + - (((i64)p[5]) << 16) + - (((i64)p[6]) << 8) + - (((i64)p[7]) << 0) - ); +static RtreeValue rtreeValueUp(sqlite3_value *v){ + double d = sqlite3_value_double(v); + float f = (float)d; + if( f<d ){ + f = (float)(d*(d<0 ? RNDTOWARDS : RNDAWAY)); + } + return f; } +#endif /* !defined(SQLITE_RTREE_INT_ONLY) */ -/* -** Functions to serialize a 16 bit integer, 32 bit real number and -** 64 bit integer. The value returned is the number of bytes written -** to the argument buffer (always 2, 4 and 8 respectively). -*/ -static int writeInt16(u8 *p, int i){ - p[0] = (i>> 8)&0xFF; - p[1] = (i>> 0)&0xFF; - return 2; -} -static int writeCoord(u8 *p, RtreeCoord *pCoord){ - u32 i; - assert( sizeof(RtreeCoord)==4 ); - assert( sizeof(u32)==4 ); - i = pCoord->u; - p[0] = (i>>24)&0xFF; - p[1] = (i>>16)&0xFF; - p[2] = (i>> 8)&0xFF; - p[3] = (i>> 0)&0xFF; - return 4; -} -static int writeInt64(u8 *p, i64 i){ - p[0] = (i>>56)&0xFF; - p[1] = (i>>48)&0xFF; - p[2] = (i>>40)&0xFF; - p[3] = (i>>32)&0xFF; - p[4] = (i>>24)&0xFF; - p[5] = (i>>16)&0xFF; - p[6] = (i>> 8)&0xFF; - p[7] = (i>> 0)&0xFF; - return 8; -} /* -** Increment the reference count of node p. +** The xUpdate method for rtree module virtual tables. */ -static void nodeReference(RtreeNode *p){ - if( p ){ - p->nRef++; - } -} +static int rtreeUpdate( + sqlite3_vtab *pVtab, + int nData, + sqlite3_value **azData, + sqlite_int64 *pRowid +){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_OK; + RtreeCell cell; /* New cell to insert if nData>1 */ + int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ -/* -** Clear the content of node p (set all bytes to 0x00). -*/ -static void nodeZero(Rtree *pRtree, RtreeNode *p){ - memset(&p->zData[2], 0, pRtree->iNodeSize-2); - p->isDirty = 1; -} + rtreeReference(pRtree); + assert(nData>=1); -/* -** Given a node number iNode, return the corresponding key to use -** in the Rtree.aHash table. -*/ -static int nodeHash(i64 iNode){ - return iNode % HASHSIZE; -} + cell.iRowid = 0; /* Used only to suppress a compiler warning */ -/* -** Search the node hash table for node iNode. If found, return a pointer -** to it. Otherwise, return 0. -*/ -static RtreeNode *nodeHashLookup(Rtree *pRtree, i64 iNode){ - RtreeNode *p; - for(p=pRtree->aHash[nodeHash(iNode)]; p && p->iNode!=iNode; p=p->pNext); - return p; -} + /* Constraint handling. A write operation on an r-tree table may return + ** SQLITE_CONSTRAINT for two reasons: + ** + ** 1. A duplicate rowid value, or + ** 2. The supplied data violates the "x2>=x1" constraint. + ** + ** In the first case, if the conflict-handling mode is REPLACE, then + ** the conflicting row can be removed before proceeding. In the second + ** case, SQLITE_CONSTRAINT must be returned regardless of the + ** conflict-handling mode specified by the user. + */ + if( nData>1 ){ + int ii; -/* -** Add node pNode to the node hash table. -*/ -static void nodeHashInsert(Rtree *pRtree, RtreeNode *pNode){ - int iHash; - assert( pNode->pNext==0 ); - iHash = nodeHash(pNode->iNode); - pNode->pNext = pRtree->aHash[iHash]; - pRtree->aHash[iHash] = pNode; -} + /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. + ** + ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared + ** with "column" that are interpreted as table constraints. + ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); + ** This problem was discovered after years of use, so we silently ignore + ** these kinds of misdeclared tables to avoid breaking any legacy. + */ + assert( nData<=(pRtree->nDim*2 + 3) ); -/* -** Remove node pNode from the node hash table. -*/ -static void nodeHashDelete(Rtree *pRtree, RtreeNode *pNode){ - RtreeNode **pp; - if( pNode->iNode!=0 ){ - pp = &pRtree->aHash[nodeHash(pNode->iNode)]; - for( ; (*pp)!=pNode; pp = &(*pp)->pNext){ assert(*pp); } - *pp = pNode->pNext; - pNode->pNext = 0; +#ifndef SQLITE_RTREE_INT_ONLY + if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ + for(ii=0; ii<nData-4; ii+=2){ + cell.aCoord[ii].f = rtreeValueDown(azData[ii+3]); + cell.aCoord[ii+1].f = rtreeValueUp(azData[ii+4]); + if( cell.aCoord[ii].f>cell.aCoord[ii+1].f ){ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + }else +#endif + { + for(ii=0; ii<nData-4; ii+=2){ + cell.aCoord[ii].i = sqlite3_value_int(azData[ii+3]); + cell.aCoord[ii+1].i = sqlite3_value_int(azData[ii+4]); + if( cell.aCoord[ii].i>cell.aCoord[ii+1].i ){ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + } + + /* If a rowid value was supplied, check if it is already present in + ** the table. If so, the constraint has failed. */ + if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){ + cell.iRowid = sqlite3_value_int64(azData[2]); + if( sqlite3_value_type(azData[0])==SQLITE_NULL + || sqlite3_value_int64(azData[0])!=cell.iRowid + ){ + int steprc; + sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); + steprc = sqlite3_step(pRtree->pReadRowid); + rc = sqlite3_reset(pRtree->pReadRowid); + if( SQLITE_ROW==steprc ){ + if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ + rc = rtreeDeleteRowid(pRtree, cell.iRowid); + }else{ + rc = SQLITE_CONSTRAINT; + goto constraint; + } + } + } + bHaveRowid = 1; + } + } + + /* If azData[0] is not an SQL NULL value, it is the rowid of a + ** record to delete from the r-tree table. The following block does + ** just that. + */ + if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){ + rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0])); + } + + /* If the azData[] array contains more than one element, elements + ** (azData[2]..azData[argc-1]) contain a new record to insert into + ** the r-tree structure. + */ + if( rc==SQLITE_OK && nData>1 ){ + /* Insert the new record into the r-tree */ + RtreeNode *pLeaf = 0; + + /* Figure out the rowid of the new row. */ + if( bHaveRowid==0 ){ + rc = newRowid(pRtree, &cell.iRowid); + } + *pRowid = cell.iRowid; + + if( rc==SQLITE_OK ){ + rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); + } + if( rc==SQLITE_OK ){ + int rc2; + pRtree->iReinsertHeight = -1; + rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); + rc2 = nodeRelease(pRtree, pLeaf); + if( rc==SQLITE_OK ){ + rc = rc2; + } + } } + +constraint: + rtreeRelease(pRtree); + return rc; } /* -** Allocate and return new r-tree node. Initially, (RtreeNode.iNode==0), -** indicating that node has not yet been assigned a node number. It is -** assigned a node number when nodeWrite() is called to write the -** node contents out to the database. +** The xRename method for rtree module virtual tables. */ -static RtreeNode *nodeNew(Rtree *pRtree, RtreeNode *pParent){ - RtreeNode *pNode; - pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode) + pRtree->iNodeSize); - if( pNode ){ - memset(pNode, 0, sizeof(RtreeNode) + pRtree->iNodeSize); - pNode->zData = (u8 *)&pNode[1]; - pNode->nRef = 1; - pNode->pParent = pParent; - pNode->isDirty = 1; - nodeReference(pParent); +static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ + Rtree *pRtree = (Rtree *)pVtab; + int rc = SQLITE_NOMEM; + char *zSql = sqlite3_mprintf( + "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" + "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" + "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + , pRtree->zDb, pRtree->zName, zNewName + ); + if( zSql ){ + rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); + sqlite3_free(zSql); } - return pNode; + return rc; } /* -** Obtain a reference to an r-tree node. +** This function populates the pRtree->nRowEst variable with an estimate +** of the number of rows in the virtual table. If possible, this is based +** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. */ -static int nodeAcquire( - Rtree *pRtree, /* R-tree structure */ - i64 iNode, /* Node number to load */ - RtreeNode *pParent, /* Either the parent node or NULL */ - RtreeNode **ppNode /* OUT: Acquired node */ -){ +static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ + const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; + char *zSql; + sqlite3_stmt *p; int rc; - int rc2 = SQLITE_OK; - RtreeNode *pNode; + i64 nRow = 0; - /* Check if the requested node is already in the hash table. If so, - ** increase its reference count and return it. - */ - if( (pNode = nodeHashLookup(pRtree, iNode)) ){ - assert( !pParent || !pNode->pParent || pNode->pParent==pParent ); - if( pParent && !pNode->pParent ){ - nodeReference(pParent); - pNode->pParent = pParent; + zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); + if( rc==SQLITE_OK ){ + if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); + rc = sqlite3_finalize(p); + }else if( rc!=SQLITE_NOMEM ){ + rc = SQLITE_OK; } - pNode->nRef++; - *ppNode = pNode; - return SQLITE_OK; - } - sqlite3_bind_int64(pRtree->pReadNode, 1, iNode); - rc = sqlite3_step(pRtree->pReadNode); - if( rc==SQLITE_ROW ){ - const u8 *zBlob = sqlite3_column_blob(pRtree->pReadNode, 0); - if( pRtree->iNodeSize==sqlite3_column_bytes(pRtree->pReadNode, 0) ){ - pNode = (RtreeNode *)sqlite3_malloc(sizeof(RtreeNode)+pRtree->iNodeSize); - if( !pNode ){ - rc2 = SQLITE_NOMEM; + if( rc==SQLITE_OK ){ + if( nRow==0 ){ + pRtree->nRowEst = RTREE_DEFAULT_ROWEST; }else{ - pNode->pParent = pParent; - pNode->zData = (u8 *)&pNode[1]; - pNode->nRef = 1; - pNode->iNode = iNode; - pNode->isDirty = 0; - pNode->pNext = 0; - memcpy(pNode->zData, zBlob, pRtree->iNodeSize); - nodeReference(pParent); + pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); } } + sqlite3_free(zSql); } - rc = sqlite3_reset(pRtree->pReadNode); - if( rc==SQLITE_OK ) rc = rc2; - /* If the root node was just loaded, set pRtree->iDepth to the height - ** of the r-tree structure. A height of zero means all data is stored on - ** the root node. A height of one means the children of the root node - ** are the leaves, and so on. If the depth as specified on the root node - ** is greater than RTREE_MAX_DEPTH, the r-tree structure must be corrupt. - */ - if( pNode && iNode==1 ){ - pRtree->iDepth = readInt16(pNode->zData); - if( pRtree->iDepth>RTREE_MAX_DEPTH ){ - rc = SQLITE_CORRUPT_VTAB; - } - } + return rc; +} - /* If no error has occurred so far, check if the "number of entries" - ** field on the node is too large. If so, set the return code to - ** SQLITE_CORRUPT_VTAB. - */ - if( pNode && rc==SQLITE_OK ){ - if( NCELL(pNode)>((pRtree->iNodeSize-4)/pRtree->nBytesPerCell) ){ - rc = SQLITE_CORRUPT_VTAB; +static sqlite3_module rtreeModule = { + 0, /* iVersion */ + rtreeCreate, /* xCreate - create a table */ + rtreeConnect, /* xConnect - connect to an existing table */ + rtreeBestIndex, /* xBestIndex - Determine search strategy */ + rtreeDisconnect, /* xDisconnect - Disconnect from a table */ + rtreeDestroy, /* xDestroy - Drop a table */ + rtreeOpen, /* xOpen - open a cursor */ + rtreeClose, /* xClose - close a cursor */ + rtreeFilter, /* xFilter - configure scan constraints */ + rtreeNext, /* xNext - advance a cursor */ + rtreeEof, /* xEof */ + rtreeColumn, /* xColumn - read data */ + rtreeRowid, /* xRowid - read data */ + rtreeUpdate, /* xUpdate - write data */ + 0, /* xBegin - begin transaction */ + 0, /* xSync - sync transaction */ + 0, /* xCommit - commit transaction */ + 0, /* xRollback - rollback transaction */ + 0, /* xFindFunction - function overloading */ + rtreeRename, /* xRename - rename the table */ + 0, /* xSavepoint */ + 0, /* xRelease */ + 0 /* xRollbackTo */ +}; + +static int rtreeSqlInit( + Rtree *pRtree, + sqlite3 *db, + const char *zDb, + const char *zPrefix, + int isCreate +){ + int rc = SQLITE_OK; + + #define N_STATEMENT 9 + static const char *azSql[N_STATEMENT] = { + /* Read and write the xxx_node table */ + "SELECT data FROM '%q'.'%q_node' WHERE nodeno = :1", + "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1", + + /* Read and write the xxx_rowid table */ + "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1", + "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1", + + /* Read and write the xxx_parent table */ + "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1", + "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)", + "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1" + }; + sqlite3_stmt **appStmt[N_STATEMENT]; + int i; + + pRtree->db = db; + + if( isCreate ){ + char *zCreate = sqlite3_mprintf( +"CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);" +"CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);" +"CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY," + " parentnode INTEGER);" +"INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))", + zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize + ); + if( !zCreate ){ + return SQLITE_NOMEM; + } + rc = sqlite3_exec(db, zCreate, 0, 0, 0); + sqlite3_free(zCreate); + if( rc!=SQLITE_OK ){ + return rc; } } - if( rc==SQLITE_OK ){ - if( pNode!=0 ){ - nodeHashInsert(pRtree, pNode); + appStmt[0] = &pRtree->pReadNode; + appStmt[1] = &pRtree->pWriteNode; + appStmt[2] = &pRtree->pDeleteNode; + appStmt[3] = &pRtree->pReadRowid; + appStmt[4] = &pRtree->pWriteRowid; + appStmt[5] = &pRtree->pDeleteRowid; + appStmt[6] = &pRtree->pReadParent; + appStmt[7] = &pRtree->pWriteParent; + appStmt[8] = &pRtree->pDeleteParent; + + rc = rtreeQueryStat1(db, pRtree); + for(i=0; i<N_STATEMENT && rc==SQLITE_OK; i++){ + char *zSql = sqlite3_mprintf(azSql[i], zDb, zPrefix); + if( zSql ){ + rc = sqlite3_prepare_v2(db, zSql, -1, appStmt[i], 0); }else{ - rc = SQLITE_CORRUPT_VTAB; + rc = SQLITE_NOMEM; } - *ppNode = pNode; - }else{ - sqlite3_free(pNode); - *ppNode = 0; + sqlite3_free(zSql); } return rc; } /* -** Overwrite cell iCell of node pNode with the contents of pCell. +** The second argument to this function contains the text of an SQL statement +** that returns a single integer value. The statement is compiled and executed +** using database connection db. If successful, the integer value returned +** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error +** code is returned and the value of *piVal after returning is not defined. */ -static void nodeOverwriteCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node into which the cell is to be written */ - RtreeCell *pCell, /* The cell to write */ - int iCell /* Index into pNode into which pCell is written */ -){ - int ii; - u8 *p = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; - p += writeInt64(p, pCell->iRowid); - for(ii=0; ii<(pRtree->nDim*2); ii++){ - p += writeCoord(p, &pCell->aCoord[ii]); +static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ + int rc = SQLITE_NOMEM; + if( zSql ){ + sqlite3_stmt *pStmt = 0; + rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); + if( rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + *piVal = sqlite3_column_int(pStmt, 0); + } + rc = sqlite3_finalize(pStmt); + } } - pNode->isDirty = 1; + return rc; } /* -** Remove the cell with index iCell from node pNode. +** This function is called from within the xConnect() or xCreate() method to +** determine the node-size used by the rtree table being created or connected +** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. +** Otherwise, an SQLite error code is returned. +** +** If this function is being called as part of an xConnect(), then the rtree +** table already exists. In this case the node-size is determined by inspecting +** the root node of the tree. +** +** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. +** This ensures that each node is stored on a single database page. If the +** database page-size is so large that more than RTREE_MAXCELLS entries +** would fit in a single node, use a smaller node-size. */ -static void nodeDeleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell){ - u8 *pDst = &pNode->zData[4 + pRtree->nBytesPerCell*iCell]; - u8 *pSrc = &pDst[pRtree->nBytesPerCell]; - int nByte = (NCELL(pNode) - iCell - 1) * pRtree->nBytesPerCell; - memmove(pDst, pSrc, nByte); - writeInt16(&pNode->zData[2], NCELL(pNode)-1); - pNode->isDirty = 1; +static int getNodeSize( + sqlite3 *db, /* Database handle */ + Rtree *pRtree, /* Rtree handle */ + int isCreate, /* True for xCreate, false for xConnect */ + char **pzErr /* OUT: Error message, if any */ +){ + int rc; + char *zSql; + if( isCreate ){ + int iPageSize = 0; + zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); + rc = getIntFromStmt(db, zSql, &iPageSize); + if( rc==SQLITE_OK ){ + pRtree->iNodeSize = iPageSize-64; + if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){ + pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; + } + }else{ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + }else{ + zSql = sqlite3_mprintf( + "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", + pRtree->zDb, pRtree->zName + ); + rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); + if( rc!=SQLITE_OK ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } + } + + sqlite3_free(zSql); + return rc; } -/* -** Insert the contents of cell pCell into node pNode. If the insert -** is successful, return SQLITE_OK. +/* +** This function is the implementation of both the xConnect and xCreate +** methods of the r-tree virtual table. ** -** If there is not enough free space in pNode, return SQLITE_FULL. +** argv[0] -> module name +** argv[1] -> database name +** argv[2] -> table name +** argv[...] -> column names... */ -static int nodeInsertCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* Write new cell into this node */ - RtreeCell *pCell /* The cell to be inserted */ +static int rtreeInit( + sqlite3 *db, /* Database connection */ + void *pAux, /* One of the RTREE_COORD_* constants */ + int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ + sqlite3_vtab **ppVtab, /* OUT: New virtual table */ + char **pzErr, /* OUT: Error message, if any */ + int isCreate /* True for xCreate, false for xConnect */ ){ - int nCell; /* Current number of cells in pNode */ - int nMaxCell; /* Maximum number of cells for pNode */ + int rc = SQLITE_OK; + Rtree *pRtree; + int nDb; /* Length of string argv[1] */ + int nName; /* Length of string argv[2] */ + int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); - nMaxCell = (pRtree->iNodeSize-4)/pRtree->nBytesPerCell; - nCell = NCELL(pNode); + const char *aErrMsg[] = { + 0, /* 0 */ + "Wrong number of columns for an rtree table", /* 1 */ + "Too few columns for an rtree table", /* 2 */ + "Too many columns for an rtree table" /* 3 */ + }; - assert( nCell<=nMaxCell ); - if( nCell<nMaxCell ){ - nodeOverwriteCell(pRtree, pNode, pCell, nCell); - writeInt16(&pNode->zData[2], nCell+1); - pNode->isDirty = 1; + int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2; + if( aErrMsg[iErr] ){ + *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); + return SQLITE_ERROR; } - return (nCell==nMaxCell); -} + sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); -/* -** If the node is dirty, write it out to the database. -*/ -static int nodeWrite(Rtree *pRtree, RtreeNode *pNode){ - int rc = SQLITE_OK; - if( pNode->isDirty ){ - sqlite3_stmt *p = pRtree->pWriteNode; - if( pNode->iNode ){ - sqlite3_bind_int64(p, 1, pNode->iNode); - }else{ - sqlite3_bind_null(p, 1); - } - sqlite3_bind_blob(p, 2, pNode->zData, pRtree->iNodeSize, SQLITE_STATIC); - sqlite3_step(p); - pNode->isDirty = 0; - rc = sqlite3_reset(p); - if( pNode->iNode==0 && rc==SQLITE_OK ){ - pNode->iNode = sqlite3_last_insert_rowid(pRtree->db); - nodeHashInsert(pRtree, pNode); - } + /* Allocate the sqlite3_vtab structure */ + nDb = (int)strlen(argv[1]); + nName = (int)strlen(argv[2]); + pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2); + if( !pRtree ){ + return SQLITE_NOMEM; } - return rc; -} + memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); + pRtree->nBusy = 1; + pRtree->base.pModule = &rtreeModule; + pRtree->zDb = (char *)&pRtree[1]; + pRtree->zName = &pRtree->zDb[nDb+1]; + pRtree->nDim = (argc-4)/2; + pRtree->nBytesPerCell = 8 + pRtree->nDim*4*2; + pRtree->eCoordType = eCoordType; + memcpy(pRtree->zDb, argv[1], nDb); + memcpy(pRtree->zName, argv[2], nName); -/* -** Release a reference to a node. If the node is dirty and the reference -** count drops to zero, the node data is written to the database. -*/ -static int nodeRelease(Rtree *pRtree, RtreeNode *pNode){ - int rc = SQLITE_OK; - if( pNode ){ - assert( pNode->nRef>0 ); - pNode->nRef--; - if( pNode->nRef==0 ){ - if( pNode->iNode==1 ){ - pRtree->iDepth = -1; + /* Figure out the node size to use. */ + rc = getNodeSize(db, pRtree, isCreate, pzErr); + + /* Create/Connect to the underlying relational database schema. If + ** that is successful, call sqlite3_declare_vtab() to configure + ** the r-tree table schema. + */ + if( rc==SQLITE_OK ){ + if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + }else{ + char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]); + char *zTmp; + int ii; + for(ii=4; zSql && ii<argc; ii++){ + zTmp = zSql; + zSql = sqlite3_mprintf("%s, %s", zTmp, argv[ii]); + sqlite3_free(zTmp); } - if( pNode->pParent ){ - rc = nodeRelease(pRtree, pNode->pParent); + if( zSql ){ + zTmp = zSql; + zSql = sqlite3_mprintf("%s);", zTmp); + sqlite3_free(zTmp); } - if( rc==SQLITE_OK ){ - rc = nodeWrite(pRtree, pNode); + if( !zSql ){ + rc = SQLITE_NOMEM; + }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){ + *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); } - nodeHashDelete(pRtree, pNode); - sqlite3_free(pNode); + sqlite3_free(zSql); } } - return rc; -} -/* -** Return the 64-bit integer value associated with cell iCell of -** node pNode. If pNode is a leaf node, this is a rowid. If it is -** an internal node, then the 64-bit integer is a child page number. -*/ -static i64 nodeGetRowid( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node from which to extract the ID */ - int iCell /* The cell index from which to extract the ID */ -){ - assert( iCell<NCELL(pNode) ); - return readInt64(&pNode->zData[4 + pRtree->nBytesPerCell*iCell]); + if( rc==SQLITE_OK ){ + *ppVtab = (sqlite3_vtab *)pRtree; + }else{ + assert( *ppVtab==0 ); + assert( pRtree->nBusy==1 ); + rtreeRelease(pRtree); + } + return rc; } -/* -** Return coordinate iCoord from cell iCell in node pNode. -*/ -static void nodeGetCoord( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node from which to extract a coordinate */ - int iCell, /* The index of the cell within the node */ - int iCoord, /* Which coordinate to extract */ - RtreeCoord *pCoord /* OUT: Space to write result to */ -){ - readCoord(&pNode->zData[12 + pRtree->nBytesPerCell*iCell + 4*iCoord], pCoord); -} /* -** Deserialize cell iCell of node pNode. Populate the structure pointed -** to by pCell with the results. +** Implementation of a scalar function that decodes r-tree nodes to +** human readable strings. This can be used for debugging and analysis. +** +** The scalar function takes two arguments: (1) the number of dimensions +** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing +** an r-tree node. For a two-dimensional r-tree structure called "rt", to +** deserialize all nodes, a statement like: +** +** SELECT rtreenode(2, data) FROM rt_node; +** +** The human readable string takes the form of a Tcl list with one +** entry for each cell in the r-tree node. Each entry is itself a +** list, containing the 8-byte rowid/pageno followed by the +** <num-dimension>*2 coordinates. */ -static void nodeGetCell( - Rtree *pRtree, /* The overall R-Tree */ - RtreeNode *pNode, /* The node containing the cell to be read */ - int iCell, /* Index of the cell within the node */ - RtreeCell *pCell /* OUT: Write the cell contents here */ -){ - u8 *pData; - RtreeCoord *pCoord; +static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ + char *zText = 0; + RtreeNode node; + Rtree tree; int ii; - pCell->iRowid = nodeGetRowid(pRtree, pNode, iCell); - pData = pNode->zData + (12 + pRtree->nBytesPerCell*iCell); - pCoord = pCell->aCoord; - for(ii=0; ii<pRtree->nDim*2; ii++){ - readCoord(&pData[ii*4], &pCoord[ii]); - } -} - - -/* Forward declaration for the function that does the work of -** the virtual table module xCreate() and xConnect() methods. -*/ -static int rtreeInit( - sqlite3 *, void *, int, const char *const*, sqlite3_vtab **, char **, int -); -/* -** Rtree virtual table module xCreate method. -*/ -static int rtreeCreate( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 1); -} + UNUSED_PARAMETER(nArg); + memset(&node, 0, sizeof(RtreeNode)); + memset(&tree, 0, sizeof(Rtree)); + tree.nDim = sqlite3_value_int(apArg[0]); + tree.nBytesPerCell = 8 + 8 * tree.nDim; + node.zData = (u8 *)sqlite3_value_blob(apArg[1]); -/* -** Rtree virtual table module xConnect method. -*/ -static int rtreeConnect( - sqlite3 *db, - void *pAux, - int argc, const char *const*argv, - sqlite3_vtab **ppVtab, - char **pzErr -){ - return rtreeInit(db, pAux, argc, argv, ppVtab, pzErr, 0); -} + for(ii=0; ii<NCELL(&node); ii++){ + char zCell[512]; + int nCell = 0; + RtreeCell cell; + int jj; -/* -** Increment the r-tree reference count. -*/ -static void rtreeReference(Rtree *pRtree){ - pRtree->nBusy++; -} + nodeGetCell(&tree, &node, ii, &cell); + sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid); + nCell = (int)strlen(zCell); + for(jj=0; jj<tree.nDim*2; jj++){ +#ifndef SQLITE_RTREE_INT_ONLY + sqlite3_snprintf(512-nCell,&zCell[nCell], " %g", + (double)cell.aCoord[jj].f); +#else + sqlite3_snprintf(512-nCell,&zCell[nCell], " %d", + cell.aCoord[jj].i); +#endif + nCell = (int)strlen(zCell); + } -/* -** Decrement the r-tree reference count. When the reference count reaches -** zero the structure is deleted. -*/ -static void rtreeRelease(Rtree *pRtree){ - pRtree->nBusy--; - if( pRtree->nBusy==0 ){ - sqlite3_finalize(pRtree->pReadNode); - sqlite3_finalize(pRtree->pWriteNode); - sqlite3_finalize(pRtree->pDeleteNode); - sqlite3_finalize(pRtree->pReadRowid); - sqlite3_finalize(pRtree->pWriteRowid); - sqlite3_finalize(pRtree->pDeleteRowid); - sqlite3_finalize(pRtree->pReadParent); - sqlite3_finalize(pRtree->pWriteParent); - sqlite3_finalize(pRtree->pDeleteParent); - sqlite3_free(pRtree); + if( zText ){ + char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell); + sqlite3_free(zText); + zText = zTextNew; + }else{ + zText = sqlite3_mprintf("{%s}", zCell); + } } + + sqlite3_result_text(ctx, zText, -1, sqlite3_free); } -/* -** Rtree virtual table module xDisconnect method. +/* This routine implements an SQL function that returns the "depth" parameter +** from the front of a blob that is an r-tree node. For example: +** +** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; +** +** The depth value is 0 for all nodes other than the root node, and the root +** node always has nodeno=1, so the example above is the primary use for this +** routine. This routine is intended for testing and analysis only. */ -static int rtreeDisconnect(sqlite3_vtab *pVtab){ - rtreeRelease((Rtree *)pVtab); - return SQLITE_OK; +static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ + UNUSED_PARAMETER(nArg); + if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB + || sqlite3_value_bytes(apArg[0])<2 + ){ + sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); + }else{ + u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]); + sqlite3_result_int(ctx, readInt16(zBlob)); + } } -/* -** Rtree virtual table module xDestroy method. +/* +** Register the r-tree module with database handle db. This creates the +** virtual table module "rtree" and the debugging/analysis scalar +** function "rtreenode". */ -static int rtreeDestroy(sqlite3_vtab *pVtab){ - Rtree *pRtree = (Rtree *)pVtab; +SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){ + const int utf8 = SQLITE_UTF8; int rc; - char *zCreate = sqlite3_mprintf( - "DROP TABLE '%q'.'%q_node';" - "DROP TABLE '%q'.'%q_rowid';" - "DROP TABLE '%q'.'%q_parent';", - pRtree->zDb, pRtree->zName, - pRtree->zDb, pRtree->zName, - pRtree->zDb, pRtree->zName - ); - if( !zCreate ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_exec(pRtree->db, zCreate, 0, 0, 0); - sqlite3_free(zCreate); + + rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); + if( rc==SQLITE_OK ){ + rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); } if( rc==SQLITE_OK ){ - rtreeRelease(pRtree); +#ifdef SQLITE_RTREE_INT_ONLY + void *c = (void *)RTREE_COORD_INT32; +#else + void *c = (void *)RTREE_COORD_REAL32; +#endif + rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0); + } + if( rc==SQLITE_OK ){ + void *c = (void *)RTREE_COORD_INT32; + rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); } return rc; } -/* -** Rtree virtual table module xOpen method. +/* +** This routine deletes the RtreeGeomCallback object that was attached +** one of the SQL functions create by sqlite3_rtree_geometry_callback() +** or sqlite3_rtree_query_callback(). In other words, this routine is the +** destructor for an RtreeGeomCallback objecct. This routine is called when +** the corresponding SQL function is deleted. */ -static int rtreeOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ - int rc = SQLITE_NOMEM; - RtreeCursor *pCsr; +static void rtreeFreeCallback(void *p){ + RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p; + if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext); + sqlite3_free(p); +} - pCsr = (RtreeCursor *)sqlite3_malloc(sizeof(RtreeCursor)); - if( pCsr ){ - memset(pCsr, 0, sizeof(RtreeCursor)); - pCsr->base.pVtab = pVTab; - rc = SQLITE_OK; +/* +** This routine frees the BLOB that is returned by geomCallback(). +*/ +static void rtreeMatchArgFree(void *pArg){ + int i; + RtreeMatchArg *p = (RtreeMatchArg*)pArg; + for(i=0; i<p->nParam; i++){ + sqlite3_value_free(p->apSqlParam[i]); } - *ppCursor = (sqlite3_vtab_cursor *)pCsr; - - return rc; + sqlite3_free(p); } - /* -** Free the RtreeCursor.aConstraint[] array and its contents. +** Each call to sqlite3_rtree_geometry_callback() or +** sqlite3_rtree_query_callback() creates an ordinary SQLite +** scalar function that is implemented by this routine. +** +** All this function does is construct an RtreeMatchArg object that +** contains the geometry-checking callback routines and a list of +** parameters to this function, then return that RtreeMatchArg object +** as a BLOB. +** +** The R-Tree MATCH operator will read the returned BLOB, deserialize +** the RtreeMatchArg object, and use the RtreeMatchArg object to figure +** out which elements of the R-Tree should be returned by the query. */ -static void freeCursorConstraints(RtreeCursor *pCsr){ - if( pCsr->aConstraint ){ - int i; /* Used to iterate through constraint array */ - for(i=0; i<pCsr->nConstraint; i++){ - sqlite3_rtree_query_info *pInfo = pCsr->aConstraint[i].pInfo; - if( pInfo ){ - if( pInfo->xDelUser ) pInfo->xDelUser(pInfo->pUser); - sqlite3_free(pInfo); - } +static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ + RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); + RtreeMatchArg *pBlob; + int nBlob; + int memErr = 0; + + nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue) + + nArg*sizeof(sqlite3_value*); + pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); + if( !pBlob ){ + sqlite3_result_error_nomem(ctx); + }else{ + int i; + pBlob->magic = RTREE_GEOMETRY_MAGIC; + pBlob->cb = pGeomCtx[0]; + pBlob->apSqlParam = (sqlite3_value**)&pBlob->aParam[nArg]; + pBlob->nParam = nArg; + for(i=0; i<nArg; i++){ + pBlob->apSqlParam[i] = sqlite3_value_dup(aArg[i]); + if( pBlob->apSqlParam[i]==0 ) memErr = 1; +#ifdef SQLITE_RTREE_INT_ONLY + pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); +#else + pBlob->aParam[i] = sqlite3_value_double(aArg[i]); +#endif + } + if( memErr ){ + sqlite3_result_error_nomem(ctx); + rtreeMatchArgFree(pBlob); + }else{ + sqlite3_result_blob(ctx, pBlob, nBlob, rtreeMatchArgFree); } - sqlite3_free(pCsr->aConstraint); - pCsr->aConstraint = 0; } } -/* -** Rtree virtual table module xClose method. +/* +** Register a new geometry function for use with the r-tree MATCH operator. */ -static int rtreeClose(sqlite3_vtab_cursor *cur){ - Rtree *pRtree = (Rtree *)(cur->pVtab); - int ii; - RtreeCursor *pCsr = (RtreeCursor *)cur; - freeCursorConstraints(pCsr); - sqlite3_free(pCsr->aPoint); - for(ii=0; ii<RTREE_CACHE_SZ; ii++) nodeRelease(pRtree, pCsr->aNode[ii]); - sqlite3_free(pCsr); - return SQLITE_OK; +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_geometry_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zGeom, /* Name of the new SQL function */ + int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ + void *pContext /* Extra data associated with the callback */ +){ + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = xGeom; + pGeomCtx->xQueryFunc = 0; + pGeomCtx->xDestructor = 0; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); } /* -** Rtree virtual table module xEof method. -** -** Return non-zero if the cursor does not currently point to a valid -** record (i.e if the scan has finished), or zero otherwise. +** Register a new 2nd-generation geometry function for use with the +** r-tree MATCH operator. */ -static int rtreeEof(sqlite3_vtab_cursor *cur){ - RtreeCursor *pCsr = (RtreeCursor *)cur; - return pCsr->atEOF; +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_query_callback( + sqlite3 *db, /* Register SQL function on this connection */ + const char *zQueryFunc, /* Name of new SQL function */ + int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ + void *pContext, /* Extra data passed into the callback */ + void (*xDestructor)(void*) /* Destructor for the extra data */ +){ + RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ + + /* Allocate and populate the context object. */ + pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); + if( !pGeomCtx ) return SQLITE_NOMEM; + pGeomCtx->xGeom = 0; + pGeomCtx->xQueryFunc = xQueryFunc; + pGeomCtx->xDestructor = xDestructor; + pGeomCtx->pContext = pContext; + return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, + (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + ); +} + +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int SQLITE_STDCALL sqlite3_rtree_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi +){ + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3RtreeInit(db); } +#endif + +#endif +/************** End of rtree.c ***********************************************/ +/************** Begin file icu.c *********************************************/ /* -** Convert raw bits from the on-disk RTree record into a coordinate value. -** The on-disk format is big-endian and needs to be converted for little- -** endian platforms. The on-disk record stores integer coordinates if -** eInt is true and it stores 32-bit floating point records if eInt is -** false. a[] is the four bytes of the on-disk record to be decoded. -** Store the results in "r". +** 2007 May 6 ** -** There are three versions of this macro, one each for little-endian and -** big-endian processors and a third generic implementation. The endian- -** specific implementations are much faster and are preferred if the -** processor endianness is known at compile-time. The SQLITE_BYTEORDER -** macro is part of sqliteInt.h and hence the endian-specific -** implementation will only be used if this module is compiled as part -** of the amalgamation. +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ +** +** This file implements an integration between the ICU library +** ("International Components for Unicode", an open-source library +** for handling unicode data) and SQLite. The integration uses +** ICU to provide the following to SQLite: +** +** * An implementation of the SQL regexp() function (and hence REGEXP +** operator) using the ICU uregex_XX() APIs. +** +** * Implementations of the SQL scalar upper() and lower() functions +** for case mapping. +** +** * Integration of ICU and SQLite collation sequences. +** +** * An implementation of the LIKE operator that uses ICU to +** provide case-independent matching. */ -#if defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==1234 -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - memcpy(&c.u,a,4); \ - c.u = ((c.u>>24)&0xff)|((c.u>>8)&0xff00)| \ - ((c.u&0xff)<<24)|((c.u&0xff00)<<8); \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ -} -#elif defined(SQLITE_BYTEORDER) && SQLITE_BYTEORDER==4321 -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - memcpy(&c.u,a,4); \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ -} + +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) + +/* Include ICU headers */ +#include <unicode/utypes.h> +#include <unicode/uregex.h> +#include <unicode/ustring.h> +#include <unicode/ucol.h> + +/* #include <assert.h> */ + +#ifndef SQLITE_CORE +/* #include "sqlite3ext.h" */ + SQLITE_EXTENSION_INIT1 #else -#define RTREE_DECODE_COORD(eInt, a, r) { \ - RtreeCoord c; /* Coordinate decoded */ \ - c.u = ((u32)a[0]<<24) + ((u32)a[1]<<16) \ - +((u32)a[2]<<8) + a[3]; \ - r = eInt ? (sqlite3_rtree_dbl)c.i : (sqlite3_rtree_dbl)c.f; \ -} +/* #include "sqlite3.h" */ #endif /* -** Check the RTree node or entry given by pCellData and p against the MATCH -** constraint pConstraint. +** Maximum length (in bytes) of the pattern in a LIKE or GLOB +** operator. */ -static int rtreeCallbackConstraint( - RtreeConstraint *pConstraint, /* The constraint to test */ - int eInt, /* True if RTree holding integer coordinates */ - u8 *pCellData, /* Raw cell content */ - RtreeSearchPoint *pSearch, /* Container of this cell */ - sqlite3_rtree_dbl *prScore, /* OUT: score for the cell */ - int *peWithin /* OUT: visibility of the cell */ -){ - int i; /* Loop counter */ - sqlite3_rtree_query_info *pInfo = pConstraint->pInfo; /* Callback info */ - int nCoord = pInfo->nCoord; /* No. of coordinates */ - int rc; /* Callback return code */ - sqlite3_rtree_dbl aCoord[RTREE_MAX_DIMENSIONS*2]; /* Decoded coordinates */ - - assert( pConstraint->op==RTREE_MATCH || pConstraint->op==RTREE_QUERY ); - assert( nCoord==2 || nCoord==4 || nCoord==6 || nCoord==8 || nCoord==10 ); +#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH +# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 +#endif - if( pConstraint->op==RTREE_QUERY && pSearch->iLevel==1 ){ - pInfo->iRowid = readInt64(pCellData); - } - pCellData += 8; - for(i=0; i<nCoord; i++, pCellData += 4){ - RTREE_DECODE_COORD(eInt, pCellData, aCoord[i]); - } - if( pConstraint->op==RTREE_MATCH ){ - rc = pConstraint->u.xGeom((sqlite3_rtree_geometry*)pInfo, - nCoord, aCoord, &i); - if( i==0 ) *peWithin = NOT_WITHIN; - *prScore = RTREE_ZERO; - }else{ - pInfo->aCoord = aCoord; - pInfo->iLevel = pSearch->iLevel - 1; - pInfo->rScore = pInfo->rParentScore = pSearch->rScore; - pInfo->eWithin = pInfo->eParentWithin = pSearch->eWithin; - rc = pConstraint->u.xQueryFunc(pInfo); - if( pInfo->eWithin<*peWithin ) *peWithin = pInfo->eWithin; - if( pInfo->rScore<*prScore || *prScore<RTREE_ZERO ){ - *prScore = pInfo->rScore; - } - } - return rc; +/* +** Version of sqlite3_free() that is always a function, never a macro. +*/ +static void xFree(void *p){ + sqlite3_free(p); } -/* -** Check the internal RTree node given by pCellData against constraint p. -** If this constraint cannot be satisfied by any child within the node, -** set *peWithin to NOT_WITHIN. +/* +** Compare two UTF-8 strings for equality where the first string is +** a "LIKE" expression. Return true (1) if they are the same and +** false (0) if they are different. */ -static void rtreeNonleafConstraint( - RtreeConstraint *p, /* The constraint to test */ - int eInt, /* True if RTree holds integer coordinates */ - u8 *pCellData, /* Raw cell content as appears on disk */ - int *peWithin /* Adjust downward, as appropriate */ +static int icuLikeCompare( + const uint8_t *zPattern, /* LIKE pattern */ + const uint8_t *zString, /* The UTF-8 string to compare against */ + const UChar32 uEsc /* The escape character */ ){ - sqlite3_rtree_dbl val; /* Coordinate value convert to a double */ + static const int MATCH_ONE = (UChar32)'_'; + static const int MATCH_ALL = (UChar32)'%'; - /* p->iCoord might point to either a lower or upper bound coordinate - ** in a coordinate pair. But make pCellData point to the lower bound. - */ - pCellData += 8 + 4*(p->iCoord&0xfe); + int iPattern = 0; /* Current byte index in zPattern */ + int iString = 0; /* Current byte index in zString */ - assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ ); - switch( p->op ){ - case RTREE_LE: - case RTREE_LT: - case RTREE_EQ: - RTREE_DECODE_COORD(eInt, pCellData, val); - /* val now holds the lower bound of the coordinate pair */ - if( p->u.rValue>=val ) return; - if( p->op!=RTREE_EQ ) break; /* RTREE_LE and RTREE_LT end here */ - /* Fall through for the RTREE_EQ case */ + int prevEscape = 0; /* True if the previous character was uEsc */ - default: /* RTREE_GT or RTREE_GE, or fallthrough of RTREE_EQ */ - pCellData += 4; - RTREE_DECODE_COORD(eInt, pCellData, val); - /* val now holds the upper bound of the coordinate pair */ - if( p->u.rValue<=val ) return; + while( zPattern[iPattern]!=0 ){ + + /* Read (and consume) the next character from the input pattern. */ + UChar32 uPattern; + U8_NEXT_UNSAFE(zPattern, iPattern, uPattern); + + /* There are now 4 possibilities: + ** + ** 1. uPattern is an unescaped match-all character "%", + ** 2. uPattern is an unescaped match-one character "_", + ** 3. uPattern is an unescaped escape character, or + ** 4. uPattern is to be handled as an ordinary character + */ + if( !prevEscape && uPattern==MATCH_ALL ){ + /* Case 1. */ + uint8_t c; + + /* Skip any MATCH_ALL or MATCH_ONE characters that follow a + ** MATCH_ALL. For each MATCH_ONE, skip one character in the + ** test string. + */ + while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ + if( c==MATCH_ONE ){ + if( zString[iString]==0 ) return 0; + U8_FWD_1_UNSAFE(zString, iString); + } + iPattern++; + } + + if( zPattern[iPattern]==0 ) return 1; + + while( zString[iString] ){ + if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ + return 1; + } + U8_FWD_1_UNSAFE(zString, iString); + } + return 0; + + }else if( !prevEscape && uPattern==MATCH_ONE ){ + /* Case 2. */ + if( zString[iString]==0 ) return 0; + U8_FWD_1_UNSAFE(zString, iString); + + }else if( !prevEscape && uPattern==uEsc){ + /* Case 3. */ + prevEscape = 1; + + }else{ + /* Case 4. */ + UChar32 uString; + U8_NEXT_UNSAFE(zString, iString, uString); + uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); + uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); + if( uString!=uPattern ){ + return 0; + } + prevEscape = 0; + } } - *peWithin = NOT_WITHIN; + + return zString[iString]==0; } /* -** Check the leaf RTree cell given by pCellData against constraint p. -** If this constraint is not satisfied, set *peWithin to NOT_WITHIN. -** If the constraint is satisfied, leave *peWithin unchanged. +** Implementation of the like() SQL function. This function implements +** the build-in LIKE operator. The first argument to the function is the +** pattern and the second argument is the string. So, the SQL statements: ** -** The constraint is of the form: xN op $val +** A LIKE B ** -** The op is given by p->op. The xN is p->iCoord-th coordinate in -** pCellData. $val is given by p->u.rValue. +** is implemented as like(B, A). If there is an escape character E, +** +** A LIKE B ESCAPE E +** +** is mapped to like(B, A, E). */ -static void rtreeLeafConstraint( - RtreeConstraint *p, /* The constraint to test */ - int eInt, /* True if RTree holds integer coordinates */ - u8 *pCellData, /* Raw cell content as appears on disk */ - int *peWithin /* Adjust downward, as appropriate */ +static void icuLikeFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv ){ - RtreeDValue xN; /* Coordinate value converted to a double */ + const unsigned char *zA = sqlite3_value_text(argv[0]); + const unsigned char *zB = sqlite3_value_text(argv[1]); + UChar32 uEsc = 0; - assert(p->op==RTREE_LE || p->op==RTREE_LT || p->op==RTREE_GE - || p->op==RTREE_GT || p->op==RTREE_EQ ); - pCellData += 8 + p->iCoord*4; - RTREE_DECODE_COORD(eInt, pCellData, xN); - switch( p->op ){ - case RTREE_LE: if( xN <= p->u.rValue ) return; break; - case RTREE_LT: if( xN < p->u.rValue ) return; break; - case RTREE_GE: if( xN >= p->u.rValue ) return; break; - case RTREE_GT: if( xN > p->u.rValue ) return; break; - default: if( xN == p->u.rValue ) return; break; + /* Limit the length of the LIKE or GLOB pattern to avoid problems + ** of deep recursion and N*N behavior in patternCompare(). + */ + if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ + sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); + return; } - *peWithin = NOT_WITHIN; -} -/* -** One of the cells in node pNode is guaranteed to have a 64-bit -** integer value equal to iRowid. Return the index of this cell. -*/ -static int nodeRowidIndex( - Rtree *pRtree, - RtreeNode *pNode, - i64 iRowid, - int *piIndex -){ - int ii; - int nCell = NCELL(pNode); - assert( nCell<200 ); - for(ii=0; ii<nCell; ii++){ - if( nodeGetRowid(pRtree, pNode, ii)==iRowid ){ - *piIndex = ii; - return SQLITE_OK; + + if( argc==3 ){ + /* The escape character string must consist of a single UTF-8 character. + ** Otherwise, return an error. + */ + int nE= sqlite3_value_bytes(argv[2]); + const unsigned char *zE = sqlite3_value_text(argv[2]); + int i = 0; + if( zE==0 ) return; + U8_NEXT(zE, i, nE, uEsc); + if( i!=nE){ + sqlite3_result_error(context, + "ESCAPE expression must be a single character", -1); + return; } } - return SQLITE_CORRUPT_VTAB; -} -/* -** Return the index of the cell containing a pointer to node pNode -** in its parent. If pNode is the root node, return -1. -*/ -static int nodeParentIndex(Rtree *pRtree, RtreeNode *pNode, int *piIndex){ - RtreeNode *pParent = pNode->pParent; - if( pParent ){ - return nodeRowidIndex(pRtree, pParent, pNode->iNode, piIndex); + if( zA && zB ){ + sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); } - *piIndex = -1; - return SQLITE_OK; } /* -** Compare two search points. Return negative, zero, or positive if the first -** is less than, equal to, or greater than the second. +** This function is called when an ICU function called from within +** the implementation of an SQL scalar function returns an error. ** -** The rScore is the primary key. Smaller rScore values come first. -** If the rScore is a tie, then use iLevel as the tie breaker with smaller -** iLevel values coming first. In this way, if rScore is the same for all -** SearchPoints, then iLevel becomes the deciding factor and the result -** is a depth-first search, which is the desired default behavior. +** The scalar function context passed as the first argument is +** loaded with an error message based on the following two args. */ -static int rtreeSearchPointCompare( - const RtreeSearchPoint *pA, - const RtreeSearchPoint *pB +static void icuFunctionError( + sqlite3_context *pCtx, /* SQLite scalar function context */ + const char *zName, /* Name of ICU function that failed */ + UErrorCode e /* Error code returned by ICU function */ ){ - if( pA->rScore<pB->rScore ) return -1; - if( pA->rScore>pB->rScore ) return +1; - if( pA->iLevel<pB->iLevel ) return -1; - if( pA->iLevel>pB->iLevel ) return +1; - return 0; + char zBuf[128]; + sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); + zBuf[127] = '\0'; + sqlite3_result_error(pCtx, zBuf, -1); } /* -** Interchange to search points in a cursor. +** Function to delete compiled regexp objects. Registered as +** a destructor function with sqlite3_set_auxdata(). */ -static void rtreeSearchPointSwap(RtreeCursor *p, int i, int j){ - RtreeSearchPoint t = p->aPoint[i]; - assert( i<j ); - p->aPoint[i] = p->aPoint[j]; - p->aPoint[j] = t; - i++; j++; - if( i<RTREE_CACHE_SZ ){ - if( j>=RTREE_CACHE_SZ ){ - nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); - p->aNode[i] = 0; +static void icuRegexpDelete(void *p){ + URegularExpression *pExpr = (URegularExpression *)p; + uregex_close(pExpr); +} + +/* +** Implementation of SQLite REGEXP operator. This scalar function takes +** two arguments. The first is a regular expression pattern to compile +** the second is a string to match against that pattern. If either +** argument is an SQL NULL, then NULL Is returned. Otherwise, the result +** is 1 if the string matches the pattern, or 0 otherwise. +** +** SQLite maps the regexp() function to the regexp() operator such +** that the following two are equivalent: +** +** zString REGEXP zPattern +** regexp(zPattern, zString) +** +** Uses the following ICU regexp APIs: +** +** uregex_open() +** uregex_matches() +** uregex_close() +*/ +static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + UErrorCode status = U_ZERO_ERROR; + URegularExpression *pExpr; + UBool res; + const UChar *zString = sqlite3_value_text16(apArg[1]); + + (void)nArg; /* Unused parameter */ + + /* If the left hand side of the regexp operator is NULL, + ** then the result is also NULL. + */ + if( !zString ){ + return; + } + + pExpr = sqlite3_get_auxdata(p, 0); + if( !pExpr ){ + const UChar *zPattern = sqlite3_value_text16(apArg[0]); + if( !zPattern ){ + return; + } + pExpr = uregex_open(zPattern, -1, 0, 0, &status); + + if( U_SUCCESS(status) ){ + sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); }else{ - RtreeNode *pTemp = p->aNode[i]; - p->aNode[i] = p->aNode[j]; - p->aNode[j] = pTemp; + assert(!pExpr); + icuFunctionError(p, "uregex_open", status); + return; } } + + /* Configure the text that the regular expression operates on. */ + uregex_setText(pExpr, zString, -1, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_setText", status); + return; + } + + /* Attempt the match */ + res = uregex_matches(pExpr, 0, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "uregex_matches", status); + return; + } + + /* Set the text that the regular expression operates on to a NULL + ** pointer. This is not really necessary, but it is tidier than + ** leaving the regular expression object configured with an invalid + ** pointer after this function returns. + */ + uregex_setText(pExpr, 0, 0, &status); + + /* Return 1 or 0. */ + sqlite3_result_int(p, res ? 1 : 0); } /* -** Return the search point with the lowest current score. +** Implementations of scalar functions for case mapping - upper() and +** lower(). Function upper() converts its input to upper-case (ABC). +** Function lower() converts to lower-case (abc). +** +** ICU provides two types of case mapping, "general" case mapping and +** "language specific". Refer to ICU documentation for the differences +** between the two. +** +** To utilise "general" case mapping, the upper() or lower() scalar +** functions are invoked with one argument: +** +** upper('ABC') -> 'abc' +** lower('abc') -> 'ABC' +** +** To access ICU "language specific" case mapping, upper() or lower() +** should be invoked with two arguments. The second argument is the name +** of the locale to use. Passing an empty string ("") or SQL NULL value +** as the second argument is the same as invoking the 1 argument version +** of upper() or lower(). +** +** lower('I', 'en_us') -> 'i' +** lower('I', 'tr_tr') -> 'ı' (small dotless i) +** +** http://www.icu-project.org/userguide/posix.html#case_mappings */ -static RtreeSearchPoint *rtreeSearchPointFirst(RtreeCursor *pCur){ - return pCur->bPoint ? &pCur->sPoint : pCur->nPoint ? pCur->aPoint : 0; +static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ + const UChar *zInput; + UChar *zOutput; + int nInput; + int nOutput; + + UErrorCode status = U_ZERO_ERROR; + const char *zLocale = 0; + + assert(nArg==1 || nArg==2); + if( nArg==2 ){ + zLocale = (const char *)sqlite3_value_text(apArg[1]); + } + + zInput = sqlite3_value_text16(apArg[0]); + if( !zInput ){ + return; + } + nInput = sqlite3_value_bytes16(apArg[0]); + + nOutput = nInput * 2 + 2; + zOutput = sqlite3_malloc(nOutput); + if( !zOutput ){ + return; + } + + if( sqlite3_user_data(p) ){ + u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + }else{ + u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + } + + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "u_strToLower()/u_strToUpper", status); + return; + } + + sqlite3_result_text16(p, zOutput, -1, xFree); } /* -** Get the RtreeNode for the search point with the lowest score. +** Collation sequence destructor function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). */ -static RtreeNode *rtreeNodeOfFirstSearchPoint(RtreeCursor *pCur, int *pRC){ - sqlite3_int64 id; - int ii = 1 - pCur->bPoint; - assert( ii==0 || ii==1 ); - assert( pCur->bPoint || pCur->nPoint ); - if( pCur->aNode[ii]==0 ){ - assert( pRC!=0 ); - id = ii ? pCur->aPoint[0].id : pCur->sPoint.id; - *pRC = nodeAcquire(RTREE_OF_CURSOR(pCur), id, 0, &pCur->aNode[ii]); - } - return pCur->aNode[ii]; +static void icuCollationDel(void *pCtx){ + UCollator *p = (UCollator *)pCtx; + ucol_close(p); } /* -** Push a new element onto the priority queue +** Collation sequence comparison function. The pCtx argument points to +** a UCollator structure previously allocated using ucol_open(). */ -static RtreeSearchPoint *rtreeEnqueue( - RtreeCursor *pCur, /* The cursor */ - RtreeDValue rScore, /* Score for the new search point */ - u8 iLevel /* Level for the new search point */ +static int icuCollationColl( + void *pCtx, + int nLeft, + const void *zLeft, + int nRight, + const void *zRight ){ - int i, j; - RtreeSearchPoint *pNew; - if( pCur->nPoint>=pCur->nPointAlloc ){ - int nNew = pCur->nPointAlloc*2 + 8; - pNew = sqlite3_realloc(pCur->aPoint, nNew*sizeof(pCur->aPoint[0])); - if( pNew==0 ) return 0; - pCur->aPoint = pNew; - pCur->nPointAlloc = nNew; - } - i = pCur->nPoint++; - pNew = pCur->aPoint + i; - pNew->rScore = rScore; - pNew->iLevel = iLevel; - assert( iLevel<=RTREE_MAX_DEPTH ); - while( i>0 ){ - RtreeSearchPoint *pParent; - j = (i-1)/2; - pParent = pCur->aPoint + j; - if( rtreeSearchPointCompare(pNew, pParent)>=0 ) break; - rtreeSearchPointSwap(pCur, j, i); - i = j; - pNew = pParent; + UCollationResult res; + UCollator *p = (UCollator *)pCtx; + res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); + switch( res ){ + case UCOL_LESS: return -1; + case UCOL_GREATER: return +1; + case UCOL_EQUAL: return 0; } - return pNew; + assert(!"Unexpected return value from ucol_strcoll()"); + return 0; } /* -** Allocate a new RtreeSearchPoint and return a pointer to it. Return -** NULL if malloc fails. +** Implementation of the scalar function icu_load_collation(). +** +** This scalar function is used to add ICU collation based collation +** types to an SQLite database connection. It is intended to be called +** as follows: +** +** SELECT icu_load_collation(<locale>, <collation-name>); +** +** Where <locale> is a string containing an ICU locale identifier (i.e. +** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the +** collation sequence to create. */ -static RtreeSearchPoint *rtreeSearchPointNew( - RtreeCursor *pCur, /* The cursor */ - RtreeDValue rScore, /* Score for the new search point */ - u8 iLevel /* Level for the new search point */ +static void icuLoadCollation( + sqlite3_context *p, + int nArg, + sqlite3_value **apArg ){ - RtreeSearchPoint *pNew, *pFirst; - pFirst = rtreeSearchPointFirst(pCur); - pCur->anQueue[iLevel]++; - if( pFirst==0 - || pFirst->rScore>rScore - || (pFirst->rScore==rScore && pFirst->iLevel>iLevel) - ){ - if( pCur->bPoint ){ - int ii; - pNew = rtreeEnqueue(pCur, rScore, iLevel); - if( pNew==0 ) return 0; - ii = (int)(pNew - pCur->aPoint) + 1; - if( ii<RTREE_CACHE_SZ ){ - assert( pCur->aNode[ii]==0 ); - pCur->aNode[ii] = pCur->aNode[0]; - }else{ - nodeRelease(RTREE_OF_CURSOR(pCur), pCur->aNode[0]); - } - pCur->aNode[0] = 0; - *pNew = pCur->sPoint; - } - pCur->sPoint.rScore = rScore; - pCur->sPoint.iLevel = iLevel; - pCur->bPoint = 1; - return &pCur->sPoint; - }else{ - return rtreeEnqueue(pCur, rScore, iLevel); - } -} + sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); + UErrorCode status = U_ZERO_ERROR; + const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ + const char *zName; /* SQL Collation sequence name (eg. "japanese") */ + UCollator *pUCollator; /* ICU library collation object */ + int rc; /* Return code from sqlite3_create_collation_x() */ -#if 0 -/* Tracing routines for the RtreeSearchPoint queue */ -static void tracePoint(RtreeSearchPoint *p, int idx, RtreeCursor *pCur){ - if( idx<0 ){ printf(" s"); }else{ printf("%2d", idx); } - printf(" %d.%05lld.%02d %g %d", - p->iLevel, p->id, p->iCell, p->rScore, p->eWithin - ); - idx++; - if( idx<RTREE_CACHE_SZ ){ - printf(" %p\n", pCur->aNode[idx]); - }else{ - printf("\n"); - } -} -static void traceQueue(RtreeCursor *pCur, const char *zPrefix){ - int ii; - printf("=== %9s ", zPrefix); - if( pCur->bPoint ){ - tracePoint(&pCur->sPoint, -1, pCur); - } - for(ii=0; ii<pCur->nPoint; ii++){ - if( ii>0 || pCur->bPoint ) printf(" "); - tracePoint(&pCur->aPoint[ii], ii, pCur); + assert(nArg==2); + (void)nArg; /* Unused parameter */ + zLocale = (const char *)sqlite3_value_text(apArg[0]); + zName = (const char *)sqlite3_value_text(apArg[1]); + + if( !zLocale || !zName ){ + return; } -} -# define RTREE_QUEUE_TRACE(A,B) traceQueue(A,B) -#else -# define RTREE_QUEUE_TRACE(A,B) /* no-op */ -#endif -/* Remove the search point with the lowest current score. -*/ -static void rtreeSearchPointPop(RtreeCursor *p){ - int i, j, k, n; - i = 1 - p->bPoint; - assert( i==0 || i==1 ); - if( p->aNode[i] ){ - nodeRelease(RTREE_OF_CURSOR(p), p->aNode[i]); - p->aNode[i] = 0; + pUCollator = ucol_open(zLocale, &status); + if( !U_SUCCESS(status) ){ + icuFunctionError(p, "ucol_open", status); + return; } - if( p->bPoint ){ - p->anQueue[p->sPoint.iLevel]--; - p->bPoint = 0; - }else if( p->nPoint ){ - p->anQueue[p->aPoint[0].iLevel]--; - n = --p->nPoint; - p->aPoint[0] = p->aPoint[n]; - if( n<RTREE_CACHE_SZ-1 ){ - p->aNode[1] = p->aNode[n+1]; - p->aNode[n+1] = 0; - } - i = 0; - while( (j = i*2+1)<n ){ - k = j+1; - if( k<n && rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[j])<0 ){ - if( rtreeSearchPointCompare(&p->aPoint[k], &p->aPoint[i])<0 ){ - rtreeSearchPointSwap(p, i, k); - i = k; - }else{ - break; - } - }else{ - if( rtreeSearchPointCompare(&p->aPoint[j], &p->aPoint[i])<0 ){ - rtreeSearchPointSwap(p, i, j); - i = j; - }else{ - break; - } - } - } + assert(p); + + rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, + icuCollationColl, icuCollationDel + ); + if( rc!=SQLITE_OK ){ + ucol_close(pUCollator); + sqlite3_result_error(p, "Error registering collation function", -1); } } - /* -** Continue the search on cursor pCur until the front of the queue -** contains an entry suitable for returning as a result-set row, -** or until the RtreeSearchPoint queue is empty, indicating that the -** query has completed. +** Register the ICU extension functions with database db. */ -static int rtreeStepToLeaf(RtreeCursor *pCur){ - RtreeSearchPoint *p; - Rtree *pRtree = RTREE_OF_CURSOR(pCur); - RtreeNode *pNode; - int eWithin; - int rc = SQLITE_OK; - int nCell; - int nConstraint = pCur->nConstraint; - int ii; - int eInt; - RtreeSearchPoint x; +SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ + struct IcuScalar { + const char *zName; /* Function name */ + int nArg; /* Number of arguments */ + int enc; /* Optimal text encoding */ + void *pContext; /* sqlite3_user_data() context */ + void (*xFunc)(sqlite3_context*,int,sqlite3_value**); + } scalars[] = { + {"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc}, - eInt = pRtree->eCoordType==RTREE_COORD_INT32; - while( (p = rtreeSearchPointFirst(pCur))!=0 && p->iLevel>0 ){ - pNode = rtreeNodeOfFirstSearchPoint(pCur, &rc); - if( rc ) return rc; - nCell = NCELL(pNode); - assert( nCell<200 ); - while( p->iCell<nCell ){ - sqlite3_rtree_dbl rScore = (sqlite3_rtree_dbl)-1; - u8 *pCellData = pNode->zData + (4+pRtree->nBytesPerCell*p->iCell); - eWithin = FULLY_WITHIN; - for(ii=0; ii<nConstraint; ii++){ - RtreeConstraint *pConstraint = pCur->aConstraint + ii; - if( pConstraint->op>=RTREE_MATCH ){ - rc = rtreeCallbackConstraint(pConstraint, eInt, pCellData, p, - &rScore, &eWithin); - if( rc ) return rc; - }else if( p->iLevel==1 ){ - rtreeLeafConstraint(pConstraint, eInt, pCellData, &eWithin); - }else{ - rtreeNonleafConstraint(pConstraint, eInt, pCellData, &eWithin); - } - if( eWithin==NOT_WITHIN ) break; - } - p->iCell++; - if( eWithin==NOT_WITHIN ) continue; - x.iLevel = p->iLevel - 1; - if( x.iLevel ){ - x.id = readInt64(pCellData); - x.iCell = 0; - }else{ - x.id = p->id; - x.iCell = p->iCell - 1; - } - if( p->iCell>=nCell ){ - RTREE_QUEUE_TRACE(pCur, "POP-S:"); - rtreeSearchPointPop(pCur); - } - if( rScore<RTREE_ZERO ) rScore = RTREE_ZERO; - p = rtreeSearchPointNew(pCur, rScore, x.iLevel); - if( p==0 ) return SQLITE_NOMEM; - p->eWithin = eWithin; - p->id = x.id; - p->iCell = x.iCell; - RTREE_QUEUE_TRACE(pCur, "PUSH-S:"); - break; - } - if( p->iCell>=nCell ){ - RTREE_QUEUE_TRACE(pCur, "POP-Se:"); - rtreeSearchPointPop(pCur); - } - } - pCur->atEOF = p==0; - return SQLITE_OK; -} + {"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16}, -/* -** Rtree virtual table module xNext method. -*/ -static int rtreeNext(sqlite3_vtab_cursor *pVtabCursor){ - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - int rc = SQLITE_OK; + {"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16}, + {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, + {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, + {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, - /* Move to the next entry that matches the configured constraints. */ - RTREE_QUEUE_TRACE(pCsr, "POP-Nx:"); - rtreeSearchPointPop(pCsr); - rc = rtreeStepToLeaf(pCsr); - return rc; -} + {"like", 2, SQLITE_UTF8, 0, icuLikeFunc}, + {"like", 3, SQLITE_UTF8, 0, icuLikeFunc}, -/* -** Rtree virtual table module xRowid method. -*/ -static int rtreeRowid(sqlite3_vtab_cursor *pVtabCursor, sqlite_int64 *pRowid){ - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); - int rc = SQLITE_OK; - RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); - if( rc==SQLITE_OK && p ){ - *pRowid = nodeGetRowid(RTREE_OF_CURSOR(pCsr), pNode, p->iCell); - } - return rc; -} + {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, + }; -/* -** Rtree virtual table module xColumn method. -*/ -static int rtreeColumn(sqlite3_vtab_cursor *cur, sqlite3_context *ctx, int i){ - Rtree *pRtree = (Rtree *)cur->pVtab; - RtreeCursor *pCsr = (RtreeCursor *)cur; - RtreeSearchPoint *p = rtreeSearchPointFirst(pCsr); - RtreeCoord c; int rc = SQLITE_OK; - RtreeNode *pNode = rtreeNodeOfFirstSearchPoint(pCsr, &rc); + int i; - if( rc ) return rc; - if( p==0 ) return SQLITE_OK; - if( i==0 ){ - sqlite3_result_int64(ctx, nodeGetRowid(pRtree, pNode, p->iCell)); - }else{ - if( rc ) return rc; - nodeGetCoord(pRtree, pNode, p->iCell, i-1, &c); -#ifndef SQLITE_RTREE_INT_ONLY - if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ - sqlite3_result_double(ctx, c.f); - }else -#endif - { - assert( pRtree->eCoordType==RTREE_COORD_INT32 ); - sqlite3_result_int(ctx, c.i); - } + for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ + struct IcuScalar *p = &scalars[i]; + rc = sqlite3_create_function( + db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0 + ); } - return SQLITE_OK; + + return rc; } -/* -** Use nodeAcquire() to obtain the leaf node containing the record with -** rowid iRowid. If successful, set *ppLeaf to point to the node and -** return SQLITE_OK. If there is no such record in the table, set -** *ppLeaf to 0 and return SQLITE_OK. If an error occurs, set *ppLeaf -** to zero and return an SQLite error code. -*/ -static int findLeafNode( - Rtree *pRtree, /* RTree to search */ - i64 iRowid, /* The rowid searching for */ - RtreeNode **ppLeaf, /* Write the node here */ - sqlite3_int64 *piNode /* Write the node-id here */ +#if !SQLITE_CORE +#ifdef _WIN32 +__declspec(dllexport) +#endif +SQLITE_API int SQLITE_STDCALL sqlite3_icu_init( + sqlite3 *db, + char **pzErrMsg, + const sqlite3_api_routines *pApi ){ - int rc; - *ppLeaf = 0; - sqlite3_bind_int64(pRtree->pReadRowid, 1, iRowid); - if( sqlite3_step(pRtree->pReadRowid)==SQLITE_ROW ){ - i64 iNode = sqlite3_column_int64(pRtree->pReadRowid, 0); - if( piNode ) *piNode = iNode; - rc = nodeAcquire(pRtree, iNode, 0, ppLeaf); - sqlite3_reset(pRtree->pReadRowid); - }else{ - rc = sqlite3_reset(pRtree->pReadRowid); - } - return rc; + SQLITE_EXTENSION_INIT2(pApi) + return sqlite3IcuInit(db); } +#endif + +#endif +/************** End of icu.c *************************************************/ +/************** Begin file fts3_icu.c ****************************************/ /* -** This function is called to configure the RtreeConstraint object passed -** as the second argument for a MATCH constraint. The value passed as the -** first argument to this function is the right-hand operand to the MATCH -** operator. +** 2007 June 22 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file implements a tokenizer for fts3 based on the ICU library. */ -static int deserializeGeometry(sqlite3_value *pValue, RtreeConstraint *pCons){ - RtreeMatchArg *pBlob; /* BLOB returned by geometry function */ - sqlite3_rtree_query_info *pInfo; /* Callback information */ - int nBlob; /* Size of the geometry function blob */ - int nExpected; /* Expected size of the BLOB */ +/* #include "fts3Int.h" */ +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) +#ifdef SQLITE_ENABLE_ICU - /* Check that value is actually a blob. */ - if( sqlite3_value_type(pValue)!=SQLITE_BLOB ) return SQLITE_ERROR; +/* #include <assert.h> */ +/* #include <string.h> */ +/* #include "fts3_tokenizer.h" */ - /* Check that the blob is roughly the right size. */ - nBlob = sqlite3_value_bytes(pValue); - if( nBlob<(int)sizeof(RtreeMatchArg) - || ((nBlob-sizeof(RtreeMatchArg))%sizeof(RtreeDValue))!=0 - ){ - return SQLITE_ERROR; - } +#include <unicode/ubrk.h> +/* #include <unicode/ucol.h> */ +/* #include <unicode/ustring.h> */ +#include <unicode/utf16.h> - pInfo = (sqlite3_rtree_query_info*)sqlite3_malloc( sizeof(*pInfo)+nBlob ); - if( !pInfo ) return SQLITE_NOMEM; - memset(pInfo, 0, sizeof(*pInfo)); - pBlob = (RtreeMatchArg*)&pInfo[1]; +typedef struct IcuTokenizer IcuTokenizer; +typedef struct IcuCursor IcuCursor; - memcpy(pBlob, sqlite3_value_blob(pValue), nBlob); - nExpected = (int)(sizeof(RtreeMatchArg) + - (pBlob->nParam-1)*sizeof(RtreeDValue)); - if( pBlob->magic!=RTREE_GEOMETRY_MAGIC || nBlob!=nExpected ){ - sqlite3_free(pInfo); - return SQLITE_ERROR; - } - pInfo->pContext = pBlob->cb.pContext; - pInfo->nParam = pBlob->nParam; - pInfo->aParam = pBlob->aParam; +struct IcuTokenizer { + sqlite3_tokenizer base; + char *zLocale; +}; - if( pBlob->cb.xGeom ){ - pCons->u.xGeom = pBlob->cb.xGeom; - }else{ - pCons->op = RTREE_QUERY; - pCons->u.xQueryFunc = pBlob->cb.xQueryFunc; - } - pCons->pInfo = pInfo; - return SQLITE_OK; -} +struct IcuCursor { + sqlite3_tokenizer_cursor base; -/* -** Rtree virtual table module xFilter method. + UBreakIterator *pIter; /* ICU break-iterator object */ + int nChar; /* Number of UChar elements in pInput */ + UChar *aChar; /* Copy of input using utf-16 encoding */ + int *aOffset; /* Offsets of each character in utf-8 input */ + + int nBuffer; + char *zBuffer; + + int iToken; +}; + +/* +** Create a new tokenizer instance. */ -static int rtreeFilter( - sqlite3_vtab_cursor *pVtabCursor, - int idxNum, const char *idxStr, - int argc, sqlite3_value **argv +static int icuCreate( + int argc, /* Number of entries in argv[] */ + const char * const *argv, /* Tokenizer creation arguments */ + sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ ){ - Rtree *pRtree = (Rtree *)pVtabCursor->pVtab; - RtreeCursor *pCsr = (RtreeCursor *)pVtabCursor; - RtreeNode *pRoot = 0; - int ii; - int rc = SQLITE_OK; - int iCell = 0; - - rtreeReference(pRtree); + IcuTokenizer *p; + int n = 0; - /* Reset the cursor to the same state as rtreeOpen() leaves it in. */ - freeCursorConstraints(pCsr); - sqlite3_free(pCsr->aPoint); - memset(pCsr, 0, sizeof(RtreeCursor)); - pCsr->base.pVtab = (sqlite3_vtab*)pRtree; + if( argc>0 ){ + n = strlen(argv[0])+1; + } + p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); + if( !p ){ + return SQLITE_NOMEM; + } + memset(p, 0, sizeof(IcuTokenizer)); - pCsr->iStrategy = idxNum; - if( idxNum==1 ){ - /* Special case - lookup by rowid. */ - RtreeNode *pLeaf; /* Leaf on which the required cell resides */ - RtreeSearchPoint *p; /* Search point for the the leaf */ - i64 iRowid = sqlite3_value_int64(argv[0]); - i64 iNode = 0; - rc = findLeafNode(pRtree, iRowid, &pLeaf, &iNode); - if( rc==SQLITE_OK && pLeaf!=0 ){ - p = rtreeSearchPointNew(pCsr, RTREE_ZERO, 0); - assert( p!=0 ); /* Always returns pCsr->sPoint */ - pCsr->aNode[0] = pLeaf; - p->id = iNode; - p->eWithin = PARTLY_WITHIN; - rc = nodeRowidIndex(pRtree, pLeaf, iRowid, &iCell); - p->iCell = iCell; - RTREE_QUEUE_TRACE(pCsr, "PUSH-F1:"); - }else{ - pCsr->atEOF = 1; - } - }else{ - /* Normal case - r-tree scan. Set up the RtreeCursor.aConstraint array - ** with the configured constraints. - */ - rc = nodeAcquire(pRtree, 1, 0, &pRoot); - if( rc==SQLITE_OK && argc>0 ){ - pCsr->aConstraint = sqlite3_malloc(sizeof(RtreeConstraint)*argc); - pCsr->nConstraint = argc; - if( !pCsr->aConstraint ){ - rc = SQLITE_NOMEM; - }else{ - memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc); - memset(pCsr->anQueue, 0, sizeof(u32)*(pRtree->iDepth + 1)); - assert( (idxStr==0 && argc==0) - || (idxStr && (int)strlen(idxStr)==argc*2) ); - for(ii=0; ii<argc; ii++){ - RtreeConstraint *p = &pCsr->aConstraint[ii]; - p->op = idxStr[ii*2]; - p->iCoord = idxStr[ii*2+1]-'0'; - if( p->op>=RTREE_MATCH ){ - /* A MATCH operator. The right-hand-side must be a blob that - ** can be cast into an RtreeMatchArg object. One created using - ** an sqlite3_rtree_geometry_callback() SQL user function. - */ - rc = deserializeGeometry(argv[ii], p); - if( rc!=SQLITE_OK ){ - break; - } - p->pInfo->nCoord = pRtree->nDim*2; - p->pInfo->anQueue = pCsr->anQueue; - p->pInfo->mxLevel = pRtree->iDepth + 1; - }else{ -#ifdef SQLITE_RTREE_INT_ONLY - p->u.rValue = sqlite3_value_int64(argv[ii]); -#else - p->u.rValue = sqlite3_value_double(argv[ii]); -#endif - } - } - } - } - if( rc==SQLITE_OK ){ - RtreeSearchPoint *pNew; - pNew = rtreeSearchPointNew(pCsr, RTREE_ZERO, pRtree->iDepth+1); - if( pNew==0 ) return SQLITE_NOMEM; - pNew->id = 1; - pNew->iCell = 0; - pNew->eWithin = PARTLY_WITHIN; - assert( pCsr->bPoint==1 ); - pCsr->aNode[0] = pRoot; - pRoot = 0; - RTREE_QUEUE_TRACE(pCsr, "PUSH-Fm:"); - rc = rtreeStepToLeaf(pCsr); - } + if( n ){ + p->zLocale = (char *)&p[1]; + memcpy(p->zLocale, argv[0], n); } - nodeRelease(pRtree, pRoot); - rtreeRelease(pRtree); - return rc; + *ppTokenizer = (sqlite3_tokenizer *)p; + + return SQLITE_OK; } /* -** Set the pIdxInfo->estimatedRows variable to nRow. Unless this -** extension is currently being used by a version of SQLite too old to -** support estimatedRows. In that case this function is a no-op. +** Destroy a tokenizer */ -static void setEstimatedRows(sqlite3_index_info *pIdxInfo, i64 nRow){ -#if SQLITE_VERSION_NUMBER>=3008002 - if( sqlite3_libversion_number()>=3008002 ){ - pIdxInfo->estimatedRows = nRow; - } -#endif +static int icuDestroy(sqlite3_tokenizer *pTokenizer){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + sqlite3_free(p); + return SQLITE_OK; } /* -** Rtree virtual table module xBestIndex method. There are three -** table scan strategies to choose from (in order from most to -** least desirable): -** -** idxNum idxStr Strategy -** ------------------------------------------------ -** 1 Unused Direct lookup by rowid. -** 2 See below R-tree query or full-table scan. -** ------------------------------------------------ -** -** If strategy 1 is used, then idxStr is not meaningful. If strategy -** 2 is used, idxStr is formatted to contain 2 bytes for each -** constraint used. The first two bytes of idxStr correspond to -** the constraint in sqlite3_index_info.aConstraintUsage[] with -** (argvIndex==1) etc. -** -** The first of each pair of bytes in idxStr identifies the constraint -** operator as follows: -** -** Operator Byte Value -** ---------------------- -** = 0x41 ('A') -** <= 0x42 ('B') -** < 0x43 ('C') -** >= 0x44 ('D') -** > 0x45 ('E') -** MATCH 0x46 ('F') -** ---------------------- -** -** The second of each pair of bytes identifies the coordinate column -** to which the constraint applies. The leftmost coordinate column -** is 'a', the second from the left 'b' etc. +** Prepare to begin tokenizing a particular string. The input +** string to be tokenized is pInput[0..nBytes-1]. A cursor +** used to incrementally tokenize this string is returned in +** *ppCursor. */ -static int rtreeBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ - Rtree *pRtree = (Rtree*)tab; - int rc = SQLITE_OK; - int ii; - i64 nRow; /* Estimated rows returned by this scan */ +static int icuOpen( + sqlite3_tokenizer *pTokenizer, /* The tokenizer */ + const char *zInput, /* Input string */ + int nInput, /* Length of zInput in bytes */ + sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ +){ + IcuTokenizer *p = (IcuTokenizer *)pTokenizer; + IcuCursor *pCsr; - int iIdx = 0; - char zIdxStr[RTREE_MAX_DIMENSIONS*8+1]; - memset(zIdxStr, 0, sizeof(zIdxStr)); + const int32_t opt = U_FOLD_CASE_DEFAULT; + UErrorCode status = U_ZERO_ERROR; + int nChar; - assert( pIdxInfo->idxStr==0 ); - for(ii=0; ii<pIdxInfo->nConstraint && iIdx<(int)(sizeof(zIdxStr)-1); ii++){ - struct sqlite3_index_constraint *p = &pIdxInfo->aConstraint[ii]; + UChar32 c; + int iInput = 0; + int iOut = 0; - if( p->usable && p->iColumn==0 && p->op==SQLITE_INDEX_CONSTRAINT_EQ ){ - /* We have an equality constraint on the rowid. Use strategy 1. */ - int jj; - for(jj=0; jj<ii; jj++){ - pIdxInfo->aConstraintUsage[jj].argvIndex = 0; - pIdxInfo->aConstraintUsage[jj].omit = 0; - } - pIdxInfo->idxNum = 1; - pIdxInfo->aConstraintUsage[ii].argvIndex = 1; - pIdxInfo->aConstraintUsage[jj].omit = 1; + *ppCursor = 0; - /* This strategy involves a two rowid lookups on an B-Tree structures - ** and then a linear search of an R-Tree node. This should be - ** considered almost as quick as a direct rowid lookup (for which - ** sqlite uses an internal cost of 0.0). It is expected to return - ** a single row. - */ - pIdxInfo->estimatedCost = 30.0; - setEstimatedRows(pIdxInfo, 1); - return SQLITE_OK; + if( zInput==0 ){ + nInput = 0; + zInput = ""; + }else if( nInput<0 ){ + nInput = strlen(zInput); + } + nChar = nInput+1; + pCsr = (IcuCursor *)sqlite3_malloc( + sizeof(IcuCursor) + /* IcuCursor */ + ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ + (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ + ); + if( !pCsr ){ + return SQLITE_NOMEM; + } + memset(pCsr, 0, sizeof(IcuCursor)); + pCsr->aChar = (UChar *)&pCsr[1]; + pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; + + pCsr->aOffset[iOut] = iInput; + U8_NEXT(zInput, iInput, nInput, c); + while( c>0 ){ + int isError = 0; + c = u_foldCase(c, opt); + U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); + if( isError ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; } + pCsr->aOffset[iOut] = iInput; - if( p->usable && (p->iColumn>0 || p->op==SQLITE_INDEX_CONSTRAINT_MATCH) ){ - u8 op; - switch( p->op ){ - case SQLITE_INDEX_CONSTRAINT_EQ: op = RTREE_EQ; break; - case SQLITE_INDEX_CONSTRAINT_GT: op = RTREE_GT; break; - case SQLITE_INDEX_CONSTRAINT_LE: op = RTREE_LE; break; - case SQLITE_INDEX_CONSTRAINT_LT: op = RTREE_LT; break; - case SQLITE_INDEX_CONSTRAINT_GE: op = RTREE_GE; break; - default: - assert( p->op==SQLITE_INDEX_CONSTRAINT_MATCH ); - op = RTREE_MATCH; - break; - } - zIdxStr[iIdx++] = op; - zIdxStr[iIdx++] = p->iColumn - 1 + '0'; - pIdxInfo->aConstraintUsage[ii].argvIndex = (iIdx/2); - pIdxInfo->aConstraintUsage[ii].omit = 1; + if( iInput<nInput ){ + U8_NEXT(zInput, iInput, nInput, c); + }else{ + c = 0; } } - pIdxInfo->idxNum = 2; - pIdxInfo->needToFreeIdxStr = 1; - if( iIdx>0 && 0==(pIdxInfo->idxStr = sqlite3_mprintf("%s", zIdxStr)) ){ - return SQLITE_NOMEM; + pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); + if( !U_SUCCESS(status) ){ + sqlite3_free(pCsr); + return SQLITE_ERROR; } + pCsr->nChar = iOut; - nRow = pRtree->nRowEst / (iIdx + 1); - pIdxInfo->estimatedCost = (double)6.0 * (double)nRow; - setEstimatedRows(pIdxInfo, nRow); - - return rc; + ubrk_first(pCsr->pIter); + *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; + return SQLITE_OK; } /* -** Return the N-dimensional volumn of the cell stored in *p. +** Close a tokenization cursor previously opened by a call to icuOpen(). */ -static RtreeDValue cellArea(Rtree *pRtree, RtreeCell *p){ - RtreeDValue area = (RtreeDValue)1; - int ii; - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - area = (area * (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii]))); - } - return area; +static int icuClose(sqlite3_tokenizer_cursor *pCursor){ + IcuCursor *pCsr = (IcuCursor *)pCursor; + ubrk_close(pCsr->pIter); + sqlite3_free(pCsr->zBuffer); + sqlite3_free(pCsr); + return SQLITE_OK; } /* -** Return the margin length of cell p. The margin length is the sum -** of the objects size in each dimension. +** Extract the next token from a tokenization cursor. */ -static RtreeDValue cellMargin(Rtree *pRtree, RtreeCell *p){ - RtreeDValue margin = (RtreeDValue)0; - int ii; - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - margin += (DCOORD(p->aCoord[ii+1]) - DCOORD(p->aCoord[ii])); - } - return margin; -} +static int icuNext( + sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ + const char **ppToken, /* OUT: *ppToken is the token text */ + int *pnBytes, /* OUT: Number of bytes in token */ + int *piStartOffset, /* OUT: Starting offset of token */ + int *piEndOffset, /* OUT: Ending offset of token */ + int *piPosition /* OUT: Position integer of token */ +){ + IcuCursor *pCsr = (IcuCursor *)pCursor; -/* -** Store the union of cells p1 and p2 in p1. -*/ -static void cellUnion(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ - int ii; - if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - p1->aCoord[ii].f = MIN(p1->aCoord[ii].f, p2->aCoord[ii].f); - p1->aCoord[ii+1].f = MAX(p1->aCoord[ii+1].f, p2->aCoord[ii+1].f); + int iStart = 0; + int iEnd = 0; + int nByte = 0; + + while( iStart==iEnd ){ + UChar32 c; + + iStart = ubrk_current(pCsr->pIter); + iEnd = ubrk_next(pCsr->pIter); + if( iEnd==UBRK_DONE ){ + return SQLITE_DONE; } - }else{ - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - p1->aCoord[ii].i = MIN(p1->aCoord[ii].i, p2->aCoord[ii].i); - p1->aCoord[ii+1].i = MAX(p1->aCoord[ii+1].i, p2->aCoord[ii+1].i); + + while( iStart<iEnd ){ + int iWhite = iStart; + U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); + if( u_isspace(c) ){ + iStart = iWhite; + }else{ + break; + } } + assert(iStart<=iEnd); } -} -/* -** Return true if the area covered by p2 is a subset of the area covered -** by p1. False otherwise. -*/ -static int cellContains(Rtree *pRtree, RtreeCell *p1, RtreeCell *p2){ - int ii; - int isInt = (pRtree->eCoordType==RTREE_COORD_INT32); - for(ii=0; ii<(pRtree->nDim*2); ii+=2){ - RtreeCoord *a1 = &p1->aCoord[ii]; - RtreeCoord *a2 = &p2->aCoord[ii]; - if( (!isInt && (a2[0].f<a1[0].f || a2[1].f>a1[1].f)) - || ( isInt && (a2[0].i<a1[0].i || a2[1].i>a1[1].i)) - ){ - return 0; + do { + UErrorCode status = U_ZERO_ERROR; + if( nByte ){ + char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); + if( !zNew ){ + return SQLITE_NOMEM; + } + pCsr->zBuffer = zNew; + pCsr->nBuffer = nByte; } - } - return 1; + + u_strToUTF8( + pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ + &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ + &status /* Output success/failure */ + ); + } while( nByte>pCsr->nBuffer ); + + *ppToken = pCsr->zBuffer; + *pnBytes = nByte; + *piStartOffset = pCsr->aOffset[iStart]; + *piEndOffset = pCsr->aOffset[iEnd]; + *piPosition = pCsr->iToken++; + + return SQLITE_OK; } /* -** Return the amount cell p would grow by if it were unioned with pCell. +** The set of routines that implement the simple tokenizer */ -static RtreeDValue cellGrowth(Rtree *pRtree, RtreeCell *p, RtreeCell *pCell){ - RtreeDValue area; - RtreeCell cell; - memcpy(&cell, p, sizeof(RtreeCell)); - area = cellArea(pRtree, &cell); - cellUnion(pRtree, &cell, pCell); - return (cellArea(pRtree, &cell)-area); -} +static const sqlite3_tokenizer_module icuTokenizerModule = { + 0, /* iVersion */ + icuCreate, /* xCreate */ + icuDestroy, /* xCreate */ + icuOpen, /* xOpen */ + icuClose, /* xClose */ + icuNext, /* xNext */ + 0, /* xLanguageid */ +}; -static RtreeDValue cellOverlap( - Rtree *pRtree, - RtreeCell *p, - RtreeCell *aCell, - int nCell +/* +** Set *ppModule to point at the implementation of the ICU tokenizer. +*/ +SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( + sqlite3_tokenizer_module const**ppModule ){ - int ii; - RtreeDValue overlap = RTREE_ZERO; - for(ii=0; ii<nCell; ii++){ - int jj; - RtreeDValue o = (RtreeDValue)1; - for(jj=0; jj<(pRtree->nDim*2); jj+=2){ - RtreeDValue x1, x2; - x1 = MAX(DCOORD(p->aCoord[jj]), DCOORD(aCell[ii].aCoord[jj])); - x2 = MIN(DCOORD(p->aCoord[jj+1]), DCOORD(aCell[ii].aCoord[jj+1])); - if( x2<x1 ){ - o = (RtreeDValue)0; - break; - }else{ - o = o * (x2-x1); - } - } - overlap += o; - } - return overlap; + *ppModule = &icuTokenizerModule; } +#endif /* defined(SQLITE_ENABLE_ICU) */ +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ +/************** End of fts3_icu.c ********************************************/ +/************** Begin file sqlite3rbu.c **************************************/ /* -** This function implements the ChooseLeaf algorithm from Gutman[84]. -** ChooseSubTree in r*tree terminology. +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** +** OVERVIEW +** +** The RBU extension requires that the RBU update be packaged as an +** SQLite database. The tables it expects to find are described in +** sqlite3rbu.h. Essentially, for each table xyz in the target database +** that the user wishes to write to, a corresponding data_xyz table is +** created in the RBU database and populated with one row for each row to +** update, insert or delete from the target table. +** +** The update proceeds in three stages: +** +** 1) The database is updated. The modified database pages are written +** to a *-oal file. A *-oal file is just like a *-wal file, except +** that it is named "<database>-oal" instead of "<database>-wal". +** Because regular SQLite clients do not look for file named +** "<database>-oal", they go on using the original database in +** rollback mode while the *-oal file is being generated. +** +** During this stage RBU does not update the database by writing +** directly to the target tables. Instead it creates "imposter" +** tables using the SQLITE_TESTCTRL_IMPOSTER interface that it uses +** to update each b-tree individually. All updates required by each +** b-tree are completed before moving on to the next, and all +** updates are done in sorted key order. +** +** 2) The "<database>-oal" file is moved to the equivalent "<database>-wal" +** location using a call to rename(2). Before doing this the RBU +** module takes an EXCLUSIVE lock on the database file, ensuring +** that there are no other active readers. +** +** Once the EXCLUSIVE lock is released, any other database readers +** detect the new *-wal file and read the database in wal mode. At +** this point they see the new version of the database - including +** the updates made as part of the RBU update. +** +** 3) The new *-wal file is checkpointed. This proceeds in the same way +** as a regular database checkpoint, except that a single frame is +** checkpointed each time sqlite3rbu_step() is called. If the RBU +** handle is closed before the entire *-wal file is checkpointed, +** the checkpoint progress is saved in the RBU database and the +** checkpoint can be resumed by another RBU client at some point in +** the future. +** +** POTENTIAL PROBLEMS +** +** The rename() call might not be portable. And RBU is not currently +** syncing the directory after renaming the file. +** +** When state is saved, any commit to the *-oal file and the commit to +** the RBU update database are not atomic. So if the power fails at the +** wrong moment they might get out of sync. As the main database will be +** committed before the RBU update database this will likely either just +** pass unnoticed, or result in SQLITE_CONSTRAINT errors (due to UNIQUE +** constraint violations). +** +** If some client does modify the target database mid RBU update, or some +** other error occurs, the RBU extension will keep throwing errors. It's +** not really clear how to get out of this state. The system could just +** by delete the RBU update database and *-oal file and have the device +** download the update again and start over. +** +** At present, for an UPDATE, both the new.* and old.* records are +** collected in the rbu_xyz table. And for both UPDATEs and DELETEs all +** fields are collected. This means we're probably writing a lot more +** data to disk when saving the state of an ongoing update to the RBU +** update database than is strictly necessary. +** */ -static int ChooseLeaf( - Rtree *pRtree, /* Rtree table */ - RtreeCell *pCell, /* Cell to insert into rtree */ - int iHeight, /* Height of sub-tree rooted at pCell */ - RtreeNode **ppLeaf /* OUT: Selected leaf page */ -){ - int rc; - int ii; - RtreeNode *pNode; - rc = nodeAcquire(pRtree, 1, 0, &pNode); - for(ii=0; rc==SQLITE_OK && ii<(pRtree->iDepth-iHeight); ii++){ - int iCell; - sqlite3_int64 iBest = 0; +/* #include <assert.h> */ +/* #include <string.h> */ +/* #include <stdio.h> */ - RtreeDValue fMinGrowth = RTREE_ZERO; - RtreeDValue fMinArea = RTREE_ZERO; +#if !defined(_WIN32) +/* # include <unistd.h> */ +#endif - int nCell = NCELL(pNode); - RtreeCell cell; - RtreeNode *pChild; +/* #include "sqlite3.h" */ - RtreeCell *aCell = 0; +#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) +/************** Include sqlite3rbu.h in the middle of sqlite3rbu.c ***********/ +/************** Begin file sqlite3rbu.h **************************************/ +/* +** 2014 August 30 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the public interface for the RBU extension. +*/ - /* Select the child node which will be enlarged the least if pCell - ** is inserted into it. Resolve ties by choosing the entry with - ** the smallest area. - */ - for(iCell=0; iCell<nCell; iCell++){ - int bBest = 0; - RtreeDValue growth; - RtreeDValue area; - nodeGetCell(pRtree, pNode, iCell, &cell); - growth = cellGrowth(pRtree, &cell, pCell); - area = cellArea(pRtree, &cell); - if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){ - bBest = 1; - } - if( bBest ){ - fMinGrowth = growth; - fMinArea = area; - iBest = cell.iRowid; - } - } +/* +** SUMMARY +** +** Writing a transaction containing a large number of operations on +** b-tree indexes that are collectively larger than the available cache +** memory can be very inefficient. +** +** The problem is that in order to update a b-tree, the leaf page (at least) +** containing the entry being inserted or deleted must be modified. If the +** working set of leaves is larger than the available cache memory, then a +** single leaf that is modified more than once as part of the transaction +** may be loaded from or written to the persistent media multiple times. +** Additionally, because the index updates are likely to be applied in +** random order, access to pages within the database is also likely to be in +** random order, which is itself quite inefficient. +** +** One way to improve the situation is to sort the operations on each index +** by index key before applying them to the b-tree. This leads to an IO +** pattern that resembles a single linear scan through the index b-tree, +** and all but guarantees each modified leaf page is loaded and stored +** exactly once. SQLite uses this trick to improve the performance of +** CREATE INDEX commands. This extension allows it to be used to improve +** the performance of large transactions on existing databases. +** +** Additionally, this extension allows the work involved in writing the +** large transaction to be broken down into sub-transactions performed +** sequentially by separate processes. This is useful if the system cannot +** guarantee that a single update process will run for long enough to apply +** the entire update, for example because the update is being applied on a +** mobile device that is frequently rebooted. Even after the writer process +** has committed one or more sub-transactions, other database clients continue +** to read from the original database snapshot. In other words, partially +** applied transactions are not visible to other clients. +** +** "RBU" stands for "Resumable Bulk Update". As in a large database update +** transmitted via a wireless network to a mobile device. A transaction +** applied using this extension is hence refered to as an "RBU update". +** +** +** LIMITATIONS +** +** An "RBU update" transaction is subject to the following limitations: +** +** * The transaction must consist of INSERT, UPDATE and DELETE operations +** only. +** +** * INSERT statements may not use any default values. +** +** * UPDATE and DELETE statements must identify their target rows by +** non-NULL PRIMARY KEY values. Rows with NULL values stored in PRIMARY +** KEY fields may not be updated or deleted. If the table being written +** has no PRIMARY KEY, affected rows must be identified by rowid. +** +** * UPDATE statements may not modify PRIMARY KEY columns. +** +** * No triggers will be fired. +** +** * No foreign key violations are detected or reported. +** +** * CHECK constraints are not enforced. +** +** * No constraint handling mode except for "OR ROLLBACK" is supported. +** +** +** PREPARATION +** +** An "RBU update" is stored as a separate SQLite database. A database +** containing an RBU update is an "RBU database". For each table in the +** target database to be updated, the RBU database should contain a table +** named "data_<target name>" containing the same set of columns as the +** target table, and one more - "rbu_control". The data_% table should +** have no PRIMARY KEY or UNIQUE constraints, but each column should have +** the same type as the corresponding column in the target database. +** The "rbu_control" column should have no type at all. For example, if +** the target database contains: +** +** CREATE TABLE t1(a INTEGER PRIMARY KEY, b TEXT, c UNIQUE); +** +** Then the RBU database should contain: +** +** CREATE TABLE data_t1(a INTEGER, b TEXT, c, rbu_control); +** +** The order of the columns in the data_% table does not matter. +** +** If the target database table is a virtual table or a table that has no +** PRIMARY KEY declaration, the data_% table must also contain a column +** named "rbu_rowid". This column is mapped to the tables implicit primary +** key column - "rowid". Virtual tables for which the "rowid" column does +** not function like a primary key value cannot be updated using RBU. For +** example, if the target db contains either of the following: +** +** CREATE VIRTUAL TABLE x1 USING fts3(a, b); +** CREATE TABLE x1(a, b) +** +** then the RBU database should contain: +** +** CREATE TABLE data_x1(a, b, rbu_rowid, rbu_control); +** +** All non-hidden columns (i.e. all columns matched by "SELECT *") of the +** target table must be present in the input table. For virtual tables, +** hidden columns are optional - they are updated by RBU if present in +** the input table, or not otherwise. For example, to write to an fts4 +** table with a hidden languageid column such as: +** +** CREATE VIRTUAL TABLE ft1 USING fts4(a, b, languageid='langid'); +** +** Either of the following input table schemas may be used: +** +** CREATE TABLE data_ft1(a, b, langid, rbu_rowid, rbu_control); +** CREATE TABLE data_ft1(a, b, rbu_rowid, rbu_control); +** +** For each row to INSERT into the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 0. The +** other columns should be set to the values that make up the new record +** to insert. +** +** If the target database table has an INTEGER PRIMARY KEY, it is not +** possible to insert a NULL value into the IPK column. Attempting to +** do so results in an SQLITE_MISMATCH error. +** +** For each row to DELETE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain integer value 1. The +** real primary key values of the row to delete should be stored in the +** corresponding columns of the data_% table. The values stored in the +** other columns are not used. +** +** For each row to UPDATE from the target database as part of the RBU +** update, the corresponding data_% table should contain a single record +** with the "rbu_control" column set to contain a value of type text. +** The real primary key values identifying the row to update should be +** stored in the corresponding columns of the data_% table row, as should +** the new values of all columns being update. The text value in the +** "rbu_control" column must contain the same number of characters as +** there are columns in the target database table, and must consist entirely +** of 'x' and '.' characters (or in some special cases 'd' - see below). For +** each column that is being updated, the corresponding character is set to +** 'x'. For those that remain as they are, the corresponding character of the +** rbu_control value should be set to '.'. For example, given the tables +** above, the update statement: +** +** UPDATE t1 SET c = 'usa' WHERE a = 4; +** +** is represented by the data_t1 row created by: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..x'); +** +** Instead of an 'x' character, characters of the rbu_control value specified +** for UPDATEs may also be set to 'd'. In this case, instead of updating the +** target table with the value stored in the corresponding data_% column, the +** user-defined SQL function "rbu_delta()" is invoked and the result stored in +** the target table column. rbu_delta() is invoked with two arguments - the +** original value currently stored in the target table column and the +** value specified in the data_xxx table. +** +** For example, this row: +** +** INSERT INTO data_t1(a, b, c, rbu_control) VALUES(4, NULL, 'usa', '..d'); +** +** is similar to an UPDATE statement such as: +** +** UPDATE t1 SET c = rbu_delta(c, 'usa') WHERE a = 4; +** +** If the target database table is a virtual table or a table with no PRIMARY +** KEY, the rbu_control value should not include a character corresponding +** to the rbu_rowid value. For example, this: +** +** INSERT INTO data_ft1(a, b, rbu_rowid, rbu_control) +** VALUES(NULL, 'usa', 12, '.x'); +** +** causes a result similar to: +** +** UPDATE ft1 SET b = 'usa' WHERE rowid = 12; +** +** The data_xxx tables themselves should have no PRIMARY KEY declarations. +** However, RBU is more efficient if reading the rows in from each data_xxx +** table in "rowid" order is roughly the same as reading them sorted by +** the PRIMARY KEY of the corresponding target database table. In other +** words, rows should be sorted using the destination table PRIMARY KEY +** fields before they are inserted into the data_xxx tables. +** +** USAGE +** +** The API declared below allows an application to apply an RBU update +** stored on disk to an existing target database. Essentially, the +** application: +** +** 1) Opens an RBU handle using the sqlite3rbu_open() function. +** +** 2) Registers any required virtual table modules with the database +** handle returned by sqlite3rbu_db(). Also, if required, register +** the rbu_delta() implementation. +** +** 3) Calls the sqlite3rbu_step() function one or more times on +** the new handle. Each call to sqlite3rbu_step() performs a single +** b-tree operation, so thousands of calls may be required to apply +** a complete update. +** +** 4) Calls sqlite3rbu_close() to close the RBU update handle. If +** sqlite3rbu_step() has been called enough times to completely +** apply the update to the target database, then the RBU database +** is marked as fully applied. Otherwise, the state of the RBU +** update application is saved in the RBU database for later +** resumption. +** +** See comments below for more detail on APIs. +** +** If an update is only partially applied to the target database by the +** time sqlite3rbu_close() is called, various state information is saved +** within the RBU database. This allows subsequent processes to automatically +** resume the RBU update from where it left off. +** +** To remove all RBU extension state information, returning an RBU database +** to its original contents, it is sufficient to drop all tables that begin +** with the prefix "rbu_" +** +** DATABASE LOCKING +** +** An RBU update may not be applied to a database in WAL mode. Attempting +** to do so is an error (SQLITE_ERROR). +** +** While an RBU handle is open, a SHARED lock may be held on the target +** database file. This means it is possible for other clients to read the +** database, but not to write it. +** +** If an RBU update is started and then suspended before it is completed, +** then an external client writes to the database, then attempting to resume +** the suspended RBU update is also an error (SQLITE_BUSY). +*/ - sqlite3_free(aCell); - rc = nodeAcquire(pRtree, iBest, pNode, &pChild); - nodeRelease(pRtree, pNode); - pNode = pChild; - } +#ifndef _SQLITE3RBU_H +#define _SQLITE3RBU_H - *ppLeaf = pNode; - return rc; -} +/* #include "sqlite3.h" ** Required for error code definitions ** */ + +typedef struct sqlite3rbu sqlite3rbu; /* -** A cell with the same content as pCell has just been inserted into -** the node pNode. This function updates the bounding box cells in -** all ancestor elements. +** Open an RBU handle. +** +** Argument zTarget is the path to the target database. Argument zRbu is +** the path to the RBU database. Each call to this function must be matched +** by a call to sqlite3rbu_close(). When opening the databases, RBU passes +** the SQLITE_CONFIG_URI flag to sqlite3_open_v2(). So if either zTarget +** or zRbu begin with "file:", it will be interpreted as an SQLite +** database URI, not a regular file name. +** +** If the zState argument is passed a NULL value, the RBU extension stores +** the current state of the update (how many rows have been updated, which +** indexes are yet to be updated etc.) within the RBU database itself. This +** can be convenient, as it means that the RBU application does not need to +** organize removing a separate state file after the update is concluded. +** Or, if zState is non-NULL, it must be a path to a database file in which +** the RBU extension can store the state of the update. +** +** When resuming an RBU update, the zState argument must be passed the same +** value as when the RBU update was started. +** +** Once the RBU update is finished, the RBU extension does not +** automatically remove any zState database file, even if it created it. +** +** By default, RBU uses the default VFS to access the files on disk. To +** use a VFS other than the default, an SQLite "file:" URI containing a +** "vfs=..." option may be passed as the zTarget option. +** +** IMPORTANT NOTE FOR ZIPVFS USERS: The RBU extension works with all of +** SQLite's built-in VFSs, including the multiplexor VFS. However it does +** not work out of the box with zipvfs. Refer to the comment describing +** the zipvfs_create_vfs() API below for details on using RBU with zipvfs. */ -static int AdjustTree( - Rtree *pRtree, /* Rtree table */ - RtreeNode *pNode, /* Adjust ancestry of this node. */ - RtreeCell *pCell /* This cell was just inserted */ -){ - RtreeNode *p = pNode; - while( p->pParent ){ - RtreeNode *pParent = p->pParent; - RtreeCell cell; - int iCell; - - if( nodeParentIndex(pRtree, p, &iCell) ){ - return SQLITE_CORRUPT_VTAB; - } - - nodeGetCell(pRtree, pParent, iCell, &cell); - if( !cellContains(pRtree, &cell, pCell) ){ - cellUnion(pRtree, &cell, pCell); - nodeOverwriteCell(pRtree, pParent, &cell, iCell); - } - - p = pParent; - } - return SQLITE_OK; -} +SQLITE_API sqlite3rbu *SQLITE_STDCALL sqlite3rbu_open( + const char *zTarget, + const char *zRbu, + const char *zState +); /* -** Write mapping (iRowid->iNode) to the <rtree>_rowid table. +** Internally, each RBU connection uses a separate SQLite database +** connection to access the target and rbu update databases. This +** API allows the application direct access to these database handles. +** +** The first argument passed to this function must be a valid, open, RBU +** handle. The second argument should be passed zero to access the target +** database handle, or non-zero to access the rbu update database handle. +** Accessing the underlying database handles may be useful in the +** following scenarios: +** +** * If any target tables are virtual tables, it may be necessary to +** call sqlite3_create_module() on the target database handle to +** register the required virtual table implementations. +** +** * If the data_xxx tables in the RBU source database are virtual +** tables, the application may need to call sqlite3_create_module() on +** the rbu update db handle to any required virtual table +** implementations. +** +** * If the application uses the "rbu_delta()" feature described above, +** it must use sqlite3_create_function() or similar to register the +** rbu_delta() implementation with the target database handle. +** +** If an error has occurred, either while opening or stepping the RBU object, +** this function may return NULL. The error code and message may be collected +** when sqlite3rbu_close() is called. */ -static int rowidWrite(Rtree *pRtree, sqlite3_int64 iRowid, sqlite3_int64 iNode){ - sqlite3_bind_int64(pRtree->pWriteRowid, 1, iRowid); - sqlite3_bind_int64(pRtree->pWriteRowid, 2, iNode); - sqlite3_step(pRtree->pWriteRowid); - return sqlite3_reset(pRtree->pWriteRowid); -} +SQLITE_API sqlite3 *SQLITE_STDCALL sqlite3rbu_db(sqlite3rbu*, int bRbu); /* -** Write mapping (iNode->iPar) to the <rtree>_parent table. +** Do some work towards applying the RBU update to the target db. +** +** Return SQLITE_DONE if the update has been completely applied, or +** SQLITE_OK if no error occurs but there remains work to do to apply +** the RBU update. If an error does occur, some other error code is +** returned. +** +** Once a call to sqlite3rbu_step() has returned a value other than +** SQLITE_OK, all subsequent calls on the same RBU handle are no-ops +** that immediately return the same value. */ -static int parentWrite(Rtree *pRtree, sqlite3_int64 iNode, sqlite3_int64 iPar){ - sqlite3_bind_int64(pRtree->pWriteParent, 1, iNode); - sqlite3_bind_int64(pRtree->pWriteParent, 2, iPar); - sqlite3_step(pRtree->pWriteParent); - return sqlite3_reset(pRtree->pWriteParent); -} +SQLITE_API int SQLITE_STDCALL sqlite3rbu_step(sqlite3rbu *pRbu); -static int rtreeInsertCell(Rtree *, RtreeNode *, RtreeCell *, int); +/* +** Close an RBU handle. +** +** If the RBU update has been completely applied, mark the RBU database +** as fully applied. Otherwise, assuming no error has occurred, save the +** current state of the RBU update appliation to the RBU database. +** +** If an error has already occurred as part of an sqlite3rbu_step() +** or sqlite3rbu_open() call, or if one occurs within this function, an +** SQLite error code is returned. Additionally, *pzErrmsg may be set to +** point to a buffer containing a utf-8 formatted English language error +** message. It is the responsibility of the caller to eventually free any +** such buffer using sqlite3_free(). +** +** Otherwise, if no error occurs, this function returns SQLITE_OK if the +** update has been partially applied, or SQLITE_DONE if it has been +** completely applied. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3rbu_close(sqlite3rbu *pRbu, char **pzErrmsg); +/* +** Return the total number of key-value operations (inserts, deletes or +** updates) that have been performed on the target database since the +** current RBU update was started. +*/ +SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3rbu_progress(sqlite3rbu *pRbu); /* -** Arguments aIdx, aDistance and aSpare all point to arrays of size -** nIdx. The aIdx array contains the set of integers from 0 to -** (nIdx-1) in no particular order. This function sorts the values -** in aIdx according to the indexed values in aDistance. For -** example, assuming the inputs: +** Create an RBU VFS named zName that accesses the underlying file-system +** via existing VFS zParent. Or, if the zParent parameter is passed NULL, +** then the new RBU VFS uses the default system VFS to access the file-system. +** The new object is registered as a non-default VFS with SQLite before +** returning. ** -** aIdx = { 0, 1, 2, 3 } -** aDistance = { 5.0, 2.0, 7.0, 6.0 } +** Part of the RBU implementation uses a custom VFS object. Usually, this +** object is created and deleted automatically by RBU. ** -** this function sets the aIdx array to contain: +** The exception is for applications that also use zipvfs. In this case, +** the custom VFS must be explicitly created by the user before the RBU +** handle is opened. The RBU VFS should be installed so that the zipvfs +** VFS uses the RBU VFS, which in turn uses any other VFS layers in use +** (for example multiplexor) to access the file-system. For example, +** to assemble an RBU enabled VFS stack that uses both zipvfs and +** multiplexor (error checking omitted): ** -** aIdx = { 0, 1, 2, 3 } +** // Create a VFS named "multiplex" (not the default). +** sqlite3_multiplex_initialize(0, 0); ** -** The aSpare array is used as temporary working space by the -** sorting algorithm. +** // Create an rbu VFS named "rbu" that uses multiplexor. If the +** // second argument were replaced with NULL, the "rbu" VFS would +** // access the file-system via the system default VFS, bypassing the +** // multiplexor. +** sqlite3rbu_create_vfs("rbu", "multiplex"); +** +** // Create a zipvfs VFS named "zipvfs" that uses rbu. +** zipvfs_create_vfs_v3("zipvfs", "rbu", 0, xCompressorAlgorithmDetector); +** +** // Make zipvfs the default VFS. +** sqlite3_vfs_register(sqlite3_vfs_find("zipvfs"), 1); +** +** Because the default VFS created above includes a RBU functionality, it +** may be used by RBU clients. Attempting to use RBU with a zipvfs VFS stack +** that does not include the RBU layer results in an error. +** +** The overhead of adding the "rbu" VFS to the system is negligible for +** non-RBU users. There is no harm in an application accessing the +** file-system via "rbu" all the time, even if it only uses RBU functionality +** occasionally. */ -static void SortByDistance( - int *aIdx, - int nIdx, - RtreeDValue *aDistance, - int *aSpare -){ - if( nIdx>1 ){ - int iLeft = 0; - int iRight = 0; +SQLITE_API int SQLITE_STDCALL sqlite3rbu_create_vfs(const char *zName, const char *zParent); - int nLeft = nIdx/2; - int nRight = nIdx-nLeft; - int *aLeft = aIdx; - int *aRight = &aIdx[nLeft]; +/* +** Deregister and destroy an RBU vfs created by an earlier call to +** sqlite3rbu_create_vfs(). +** +** VFS objects are not reference counted. If a VFS object is destroyed +** before all database handles that use it have been closed, the results +** are undefined. +*/ +SQLITE_API void SQLITE_STDCALL sqlite3rbu_destroy_vfs(const char *zName); - SortByDistance(aLeft, nLeft, aDistance, aSpare); - SortByDistance(aRight, nRight, aDistance, aSpare); +#endif /* _SQLITE3RBU_H */ - memcpy(aSpare, aLeft, sizeof(int)*nLeft); - aLeft = aSpare; +/************** End of sqlite3rbu.h ******************************************/ +/************** Continuing where we left off in sqlite3rbu.c *****************/ - while( iLeft<nLeft || iRight<nRight ){ - if( iLeft==nLeft ){ - aIdx[iLeft+iRight] = aRight[iRight]; - iRight++; - }else if( iRight==nRight ){ - aIdx[iLeft+iRight] = aLeft[iLeft]; - iLeft++; - }else{ - RtreeDValue fLeft = aDistance[aLeft[iLeft]]; - RtreeDValue fRight = aDistance[aRight[iRight]]; - if( fLeft<fRight ){ - aIdx[iLeft+iRight] = aLeft[iLeft]; - iLeft++; - }else{ - aIdx[iLeft+iRight] = aRight[iRight]; - iRight++; - } - } - } +/* Maximum number of prepared UPDATE statements held by this module */ +#define SQLITE_RBU_UPDATE_CACHESIZE 16 -#if 0 - /* Check that the sort worked */ - { - int jj; - for(jj=1; jj<nIdx; jj++){ - RtreeDValue left = aDistance[aIdx[jj-1]]; - RtreeDValue right = aDistance[aIdx[jj]]; - assert( left<=right ); - } - } +/* +** Swap two objects of type TYPE. +*/ +#if !defined(SQLITE_AMALGAMATION) +# define SWAP(TYPE,A,B) {TYPE t=A; A=B; B=t;} +#endif + +/* +** The rbu_state table is used to save the state of a partially applied +** update so that it can be resumed later. The table consists of integer +** keys mapped to values as follows: +** +** RBU_STATE_STAGE: +** May be set to integer values 1, 2, 4 or 5. As follows: +** 1: the *-rbu file is currently under construction. +** 2: the *-rbu file has been constructed, but not yet moved +** to the *-wal path. +** 4: the checkpoint is underway. +** 5: the rbu update has been checkpointed. +** +** RBU_STATE_TBL: +** Only valid if STAGE==1. The target database name of the table +** currently being written. +** +** RBU_STATE_IDX: +** Only valid if STAGE==1. The target database name of the index +** currently being written, or NULL if the main table is currently being +** updated. +** +** RBU_STATE_ROW: +** Only valid if STAGE==1. Number of rows already processed for the current +** table/index. +** +** RBU_STATE_PROGRESS: +** Trbul number of sqlite3rbu_step() calls made so far as part of this +** rbu update. +** +** RBU_STATE_CKPT: +** Valid if STAGE==4. The 64-bit checksum associated with the wal-index +** header created by recovering the *-wal file. This is used to detect +** cases when another client appends frames to the *-wal file in the +** middle of an incremental checkpoint (an incremental checkpoint cannot +** be continued if this happens). +** +** RBU_STATE_COOKIE: +** Valid if STAGE==1. The current change-counter cookie value in the +** target db file. +** +** RBU_STATE_OALSZ: +** Valid if STAGE==1. The size in bytes of the *-oal file. +*/ +#define RBU_STATE_STAGE 1 +#define RBU_STATE_TBL 2 +#define RBU_STATE_IDX 3 +#define RBU_STATE_ROW 4 +#define RBU_STATE_PROGRESS 5 +#define RBU_STATE_CKPT 6 +#define RBU_STATE_COOKIE 7 +#define RBU_STATE_OALSZ 8 + +#define RBU_STAGE_OAL 1 +#define RBU_STAGE_MOVE 2 +#define RBU_STAGE_CAPTURE 3 +#define RBU_STAGE_CKPT 4 +#define RBU_STAGE_DONE 5 + + +#define RBU_CREATE_STATE \ + "CREATE TABLE IF NOT EXISTS %s.rbu_state(k INTEGER PRIMARY KEY, v)" + +typedef struct RbuFrame RbuFrame; +typedef struct RbuObjIter RbuObjIter; +typedef struct RbuState RbuState; +typedef struct rbu_vfs rbu_vfs; +typedef struct rbu_file rbu_file; +typedef struct RbuUpdateStmt RbuUpdateStmt; + +#if !defined(SQLITE_AMALGAMATION) +typedef unsigned int u32; +typedef unsigned char u8; +typedef sqlite3_int64 i64; #endif - } -} /* -** Arguments aIdx, aCell and aSpare all point to arrays of size -** nIdx. The aIdx array contains the set of integers from 0 to -** (nIdx-1) in no particular order. This function sorts the values -** in aIdx according to dimension iDim of the cells in aCell. The -** minimum value of dimension iDim is considered first, the -** maximum used to break ties. -** -** The aSpare array is used as temporary working space by the -** sorting algorithm. +** These values must match the values defined in wal.c for the equivalent +** locks. These are not magic numbers as they are part of the SQLite file +** format. */ -static void SortByDimension( - Rtree *pRtree, - int *aIdx, - int nIdx, - int iDim, - RtreeCell *aCell, - int *aSpare -){ - if( nIdx>1 ){ +#define WAL_LOCK_WRITE 0 +#define WAL_LOCK_CKPT 1 +#define WAL_LOCK_READ0 3 - int iLeft = 0; - int iRight = 0; +/* +** A structure to store values read from the rbu_state table in memory. +*/ +struct RbuState { + int eStage; + char *zTbl; + char *zIdx; + i64 iWalCksum; + int nRow; + i64 nProgress; + u32 iCookie; + i64 iOalSz; +}; - int nLeft = nIdx/2; - int nRight = nIdx-nLeft; - int *aLeft = aIdx; - int *aRight = &aIdx[nLeft]; +struct RbuUpdateStmt { + char *zMask; /* Copy of update mask used with pUpdate */ + sqlite3_stmt *pUpdate; /* Last update statement (or NULL) */ + RbuUpdateStmt *pNext; +}; - SortByDimension(pRtree, aLeft, nLeft, iDim, aCell, aSpare); - SortByDimension(pRtree, aRight, nRight, iDim, aCell, aSpare); +/* +** An iterator of this type is used to iterate through all objects in +** the target database that require updating. For each such table, the +** iterator visits, in order: +** +** * the table itself, +** * each index of the table (zero or more points to visit), and +** * a special "cleanup table" state. +** +** abIndexed: +** If the table has no indexes on it, abIndexed is set to NULL. Otherwise, +** it points to an array of flags nTblCol elements in size. The flag is +** set for each column that is either a part of the PK or a part of an +** index. Or clear otherwise. +** +*/ +struct RbuObjIter { + sqlite3_stmt *pTblIter; /* Iterate through tables */ + sqlite3_stmt *pIdxIter; /* Index iterator */ + int nTblCol; /* Size of azTblCol[] array */ + char **azTblCol; /* Array of unquoted target column names */ + char **azTblType; /* Array of target column types */ + int *aiSrcOrder; /* src table col -> target table col */ + u8 *abTblPk; /* Array of flags, set on target PK columns */ + u8 *abNotNull; /* Array of flags, set on NOT NULL columns */ + u8 *abIndexed; /* Array of flags, set on indexed & PK cols */ + int eType; /* Table type - an RBU_PK_XXX value */ + + /* Output variables. zTbl==0 implies EOF. */ + int bCleanup; /* True in "cleanup" state */ + const char *zTbl; /* Name of target db table */ + const char *zIdx; /* Name of target db index (or null) */ + int iTnum; /* Root page of current object */ + int iPkTnum; /* If eType==EXTERNAL, root of PK index */ + int bUnique; /* Current index is unique */ + + /* Statements created by rbuObjIterPrepareAll() */ + int nCol; /* Number of columns in current object */ + sqlite3_stmt *pSelect; /* Source data */ + sqlite3_stmt *pInsert; /* Statement for INSERT operations */ + sqlite3_stmt *pDelete; /* Statement for DELETE ops */ + sqlite3_stmt *pTmpInsert; /* Insert into rbu_tmp_$zTbl */ + + /* Last UPDATE used (for PK b-tree updates only), or NULL. */ + RbuUpdateStmt *pRbuUpdate; +}; - memcpy(aSpare, aLeft, sizeof(int)*nLeft); - aLeft = aSpare; - while( iLeft<nLeft || iRight<nRight ){ - RtreeDValue xleft1 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2]); - RtreeDValue xleft2 = DCOORD(aCell[aLeft[iLeft]].aCoord[iDim*2+1]); - RtreeDValue xright1 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2]); - RtreeDValue xright2 = DCOORD(aCell[aRight[iRight]].aCoord[iDim*2+1]); - if( (iLeft!=nLeft) && ((iRight==nRight) - || (xleft1<xright1) - || (xleft1==xright1 && xleft2<xright2) - )){ - aIdx[iLeft+iRight] = aLeft[iLeft]; - iLeft++; - }else{ - aIdx[iLeft+iRight] = aRight[iRight]; - iRight++; - } - } +/* +** Values for RbuObjIter.eType +** +** 0: Table does not exist (error) +** 1: Table has an implicit rowid. +** 2: Table has an explicit IPK column. +** 3: Table has an external PK index. +** 4: Table is WITHOUT ROWID. +** 5: Table is a virtual table. +*/ +#define RBU_PK_NOTABLE 0 +#define RBU_PK_NONE 1 +#define RBU_PK_IPK 2 +#define RBU_PK_EXTERNAL 3 +#define RBU_PK_WITHOUT_ROWID 4 +#define RBU_PK_VTAB 5 -#if 0 - /* Check that the sort worked */ - { - int jj; - for(jj=1; jj<nIdx; jj++){ - RtreeDValue xleft1 = aCell[aIdx[jj-1]].aCoord[iDim*2]; - RtreeDValue xleft2 = aCell[aIdx[jj-1]].aCoord[iDim*2+1]; - RtreeDValue xright1 = aCell[aIdx[jj]].aCoord[iDim*2]; - RtreeDValue xright2 = aCell[aIdx[jj]].aCoord[iDim*2+1]; - assert( xleft1<=xright1 && (xleft1<xright1 || xleft2<=xright2) ); - } - } -#endif - } -} /* -** Implementation of the R*-tree variant of SplitNode from Beckman[1990]. +** Within the RBU_STAGE_OAL stage, each call to sqlite3rbu_step() performs +** one of the following operations. */ -static int splitNodeStartree( - Rtree *pRtree, - RtreeCell *aCell, - int nCell, - RtreeNode *pLeft, - RtreeNode *pRight, - RtreeCell *pBboxLeft, - RtreeCell *pBboxRight -){ - int **aaSorted; - int *aSpare; - int ii; +#define RBU_INSERT 1 /* Insert on a main table b-tree */ +#define RBU_DELETE 2 /* Delete a row from a main table b-tree */ +#define RBU_IDX_DELETE 3 /* Delete a row from an aux. index b-tree */ +#define RBU_IDX_INSERT 4 /* Insert on an aux. index b-tree */ +#define RBU_UPDATE 5 /* Update a row in a main table b-tree */ - int iBestDim = 0; - int iBestSplit = 0; - RtreeDValue fBestMargin = RTREE_ZERO; - int nByte = (pRtree->nDim+1)*(sizeof(int*)+nCell*sizeof(int)); +/* +** A single step of an incremental checkpoint - frame iWalFrame of the wal +** file should be copied to page iDbPage of the database file. +*/ +struct RbuFrame { + u32 iDbPage; + u32 iWalFrame; +}; - aaSorted = (int **)sqlite3_malloc(nByte); - if( !aaSorted ){ - return SQLITE_NOMEM; - } +/* +** RBU handle. +*/ +struct sqlite3rbu { + int eStage; /* Value of RBU_STATE_STAGE field */ + sqlite3 *dbMain; /* target database handle */ + sqlite3 *dbRbu; /* rbu database handle */ + char *zTarget; /* Path to target db */ + char *zRbu; /* Path to rbu db */ + char *zState; /* Path to state db (or NULL if zRbu) */ + char zStateDb[5]; /* Db name for state ("stat" or "main") */ + int rc; /* Value returned by last rbu_step() call */ + char *zErrmsg; /* Error message if rc!=SQLITE_OK */ + int nStep; /* Rows processed for current object */ + int nProgress; /* Rows processed for all objects */ + RbuObjIter objiter; /* Iterator for skipping through tbl/idx */ + const char *zVfsName; /* Name of automatically created rbu vfs */ + rbu_file *pTargetFd; /* File handle open on target db */ + i64 iOalSz; + + /* The following state variables are used as part of the incremental + ** checkpoint stage (eStage==RBU_STAGE_CKPT). See comments surrounding + ** function rbuSetupCheckpoint() for details. */ + u32 iMaxFrame; /* Largest iWalFrame value in aFrame[] */ + u32 mLock; + int nFrame; /* Entries in aFrame[] array */ + int nFrameAlloc; /* Allocated size of aFrame[] array */ + RbuFrame *aFrame; + int pgsz; + u8 *aBuf; + i64 iWalCksum; +}; - aSpare = &((int *)&aaSorted[pRtree->nDim])[pRtree->nDim*nCell]; - memset(aaSorted, 0, nByte); - for(ii=0; ii<pRtree->nDim; ii++){ - int jj; - aaSorted[ii] = &((int *)&aaSorted[pRtree->nDim])[ii*nCell]; - for(jj=0; jj<nCell; jj++){ - aaSorted[ii][jj] = jj; - } - SortByDimension(pRtree, aaSorted[ii], nCell, ii, aCell, aSpare); - } +/* +** An rbu VFS is implemented using an instance of this structure. +*/ +struct rbu_vfs { + sqlite3_vfs base; /* rbu VFS shim methods */ + sqlite3_vfs *pRealVfs; /* Underlying VFS */ + sqlite3_mutex *mutex; /* Mutex to protect pMain */ + rbu_file *pMain; /* Linked list of main db files */ +}; - for(ii=0; ii<pRtree->nDim; ii++){ - RtreeDValue margin = RTREE_ZERO; - RtreeDValue fBestOverlap = RTREE_ZERO; - RtreeDValue fBestArea = RTREE_ZERO; - int iBestLeft = 0; - int nLeft; +/* +** Each file opened by an rbu VFS is represented by an instance of +** the following structure. +*/ +struct rbu_file { + sqlite3_file base; /* sqlite3_file methods */ + sqlite3_file *pReal; /* Underlying file handle */ + rbu_vfs *pRbuVfs; /* Pointer to the rbu_vfs object */ + sqlite3rbu *pRbu; /* Pointer to rbu object (rbu target only) */ - for( - nLeft=RTREE_MINCELLS(pRtree); - nLeft<=(nCell-RTREE_MINCELLS(pRtree)); - nLeft++ - ){ - RtreeCell left; - RtreeCell right; - int kk; - RtreeDValue overlap; - RtreeDValue area; + int openFlags; /* Flags this file was opened with */ + u32 iCookie; /* Cookie value for main db files */ + u8 iWriteVer; /* "write-version" value for main db files */ - memcpy(&left, &aCell[aaSorted[ii][0]], sizeof(RtreeCell)); - memcpy(&right, &aCell[aaSorted[ii][nCell-1]], sizeof(RtreeCell)); - for(kk=1; kk<(nCell-1); kk++){ - if( kk<nLeft ){ - cellUnion(pRtree, &left, &aCell[aaSorted[ii][kk]]); - }else{ - cellUnion(pRtree, &right, &aCell[aaSorted[ii][kk]]); - } - } - margin += cellMargin(pRtree, &left); - margin += cellMargin(pRtree, &right); - overlap = cellOverlap(pRtree, &left, &right, 1); - area = cellArea(pRtree, &left) + cellArea(pRtree, &right); - if( (nLeft==RTREE_MINCELLS(pRtree)) - || (overlap<fBestOverlap) - || (overlap==fBestOverlap && area<fBestArea) - ){ - iBestLeft = nLeft; - fBestOverlap = overlap; - fBestArea = area; - } - } + int nShm; /* Number of entries in apShm[] array */ + char **apShm; /* Array of mmap'd *-shm regions */ + char *zDel; /* Delete this when closing file */ - if( ii==0 || margin<fBestMargin ){ - iBestDim = ii; - fBestMargin = margin; - iBestSplit = iBestLeft; - } - } + const char *zWal; /* Wal filename for this main db file */ + rbu_file *pWalFd; /* Wal file descriptor for this main db */ + rbu_file *pMainNext; /* Next MAIN_DB file */ +}; - memcpy(pBboxLeft, &aCell[aaSorted[iBestDim][0]], sizeof(RtreeCell)); - memcpy(pBboxRight, &aCell[aaSorted[iBestDim][iBestSplit]], sizeof(RtreeCell)); - for(ii=0; ii<nCell; ii++){ - RtreeNode *pTarget = (ii<iBestSplit)?pLeft:pRight; - RtreeCell *pBbox = (ii<iBestSplit)?pBboxLeft:pBboxRight; - RtreeCell *pCell = &aCell[aaSorted[iBestDim][ii]]; - nodeInsertCell(pRtree, pTarget, pCell); - cellUnion(pRtree, pBbox, pCell); - } - sqlite3_free(aaSorted); - return SQLITE_OK; +/* +** Prepare the SQL statement in buffer zSql against database handle db. +** If successful, set *ppStmt to point to the new statement and return +** SQLITE_OK. +** +** Otherwise, if an error does occur, set *ppStmt to NULL and return +** an SQLite error code. Additionally, set output variable *pzErrmsg to +** point to a buffer containing an error message. It is the responsibility +** of the caller to (eventually) free this buffer using sqlite3_free(). +*/ +static int prepareAndCollectError( + sqlite3 *db, + sqlite3_stmt **ppStmt, + char **pzErrmsg, + const char *zSql +){ + int rc = sqlite3_prepare_v2(db, zSql, -1, ppStmt, 0); + if( rc!=SQLITE_OK ){ + *pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + *ppStmt = 0; + } + return rc; } +/* +** Reset the SQL statement passed as the first argument. Return a copy +** of the value returned by sqlite3_reset(). +** +** If an error has occurred, then set *pzErrmsg to point to a buffer +** containing an error message. It is the responsibility of the caller +** to eventually free this buffer using sqlite3_free(). +*/ +static int resetAndCollectError(sqlite3_stmt *pStmt, char **pzErrmsg){ + int rc = sqlite3_reset(pStmt); + if( rc!=SQLITE_OK ){ + *pzErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(sqlite3_db_handle(pStmt))); + } + return rc; +} -static int updateMapping( - Rtree *pRtree, - i64 iRowid, - RtreeNode *pNode, - int iHeight +/* +** Unless it is NULL, argument zSql points to a buffer allocated using +** sqlite3_malloc containing an SQL statement. This function prepares the SQL +** statement against database db and frees the buffer. If statement +** compilation is successful, *ppStmt is set to point to the new statement +** handle and SQLITE_OK is returned. +** +** Otherwise, if an error occurs, *ppStmt is set to NULL and an error code +** returned. In this case, *pzErrmsg may also be set to point to an error +** message. It is the responsibility of the caller to free this error message +** buffer using sqlite3_free(). +** +** If argument zSql is NULL, this function assumes that an OOM has occurred. +** In this case SQLITE_NOMEM is returned and *ppStmt set to NULL. +*/ +static int prepareFreeAndCollectError( + sqlite3 *db, + sqlite3_stmt **ppStmt, + char **pzErrmsg, + char *zSql ){ - int (*xSetMapping)(Rtree *, sqlite3_int64, sqlite3_int64); - xSetMapping = ((iHeight==0)?rowidWrite:parentWrite); - if( iHeight>0 ){ - RtreeNode *pChild = nodeHashLookup(pRtree, iRowid); - if( pChild ){ - nodeRelease(pRtree, pChild->pParent); - nodeReference(pNode); - pChild->pParent = pNode; - } + int rc; + assert( *pzErrmsg==0 ); + if( zSql==0 ){ + rc = SQLITE_NOMEM; + *ppStmt = 0; + }else{ + rc = prepareAndCollectError(db, ppStmt, pzErrmsg, zSql); + sqlite3_free(zSql); } - return xSetMapping(pRtree, iRowid, pNode->iNode); + return rc; } -static int SplitNode( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight -){ +/* +** Free the RbuObjIter.azTblCol[] and RbuObjIter.abTblPk[] arrays allocated +** by an earlier call to rbuObjIterCacheTableInfo(). +*/ +static void rbuObjIterFreeCols(RbuObjIter *pIter){ int i; - int newCellIsRight = 0; - - int rc = SQLITE_OK; - int nCell = NCELL(pNode); - RtreeCell *aCell; - int *aiUsed; + for(i=0; i<pIter->nTblCol; i++){ + sqlite3_free(pIter->azTblCol[i]); + sqlite3_free(pIter->azTblType[i]); + } + sqlite3_free(pIter->azTblCol); + pIter->azTblCol = 0; + pIter->azTblType = 0; + pIter->aiSrcOrder = 0; + pIter->abTblPk = 0; + pIter->abNotNull = 0; + pIter->nTblCol = 0; + pIter->eType = 0; /* Invalid value */ +} + +/* +** Finalize all statements and free all allocations that are specific to +** the current object (table/index pair). +*/ +static void rbuObjIterClearStatements(RbuObjIter *pIter){ + RbuUpdateStmt *pUp; + + sqlite3_finalize(pIter->pSelect); + sqlite3_finalize(pIter->pInsert); + sqlite3_finalize(pIter->pDelete); + sqlite3_finalize(pIter->pTmpInsert); + pUp = pIter->pRbuUpdate; + while( pUp ){ + RbuUpdateStmt *pTmp = pUp->pNext; + sqlite3_finalize(pUp->pUpdate); + sqlite3_free(pUp); + pUp = pTmp; + } + + pIter->pSelect = 0; + pIter->pInsert = 0; + pIter->pDelete = 0; + pIter->pRbuUpdate = 0; + pIter->pTmpInsert = 0; + pIter->nCol = 0; +} - RtreeNode *pLeft = 0; - RtreeNode *pRight = 0; +/* +** Clean up any resources allocated as part of the iterator object passed +** as the only argument. +*/ +static void rbuObjIterFinalize(RbuObjIter *pIter){ + rbuObjIterClearStatements(pIter); + sqlite3_finalize(pIter->pTblIter); + sqlite3_finalize(pIter->pIdxIter); + rbuObjIterFreeCols(pIter); + memset(pIter, 0, sizeof(RbuObjIter)); +} - RtreeCell leftbbox; - RtreeCell rightbbox; +/* +** Advance the iterator to the next position. +** +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the next entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. +*/ +static int rbuObjIterNext(sqlite3rbu *p, RbuObjIter *pIter){ + int rc = p->rc; + if( rc==SQLITE_OK ){ - /* Allocate an array and populate it with a copy of pCell and - ** all cells from node pLeft. Then zero the original node. - */ - aCell = sqlite3_malloc((sizeof(RtreeCell)+sizeof(int))*(nCell+1)); - if( !aCell ){ - rc = SQLITE_NOMEM; - goto splitnode_out; - } - aiUsed = (int *)&aCell[nCell+1]; - memset(aiUsed, 0, sizeof(int)*(nCell+1)); - for(i=0; i<nCell; i++){ - nodeGetCell(pRtree, pNode, i, &aCell[i]); - } - nodeZero(pRtree, pNode); - memcpy(&aCell[nCell], pCell, sizeof(RtreeCell)); - nCell++; + /* Free any SQLite statements used while processing the previous object */ + rbuObjIterClearStatements(pIter); + if( pIter->zIdx==0 ){ + rc = sqlite3_exec(p->dbMain, + "DROP TRIGGER IF EXISTS temp.rbu_insert_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update1_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_update2_tr;" + "DROP TRIGGER IF EXISTS temp.rbu_delete_tr;" + , 0, 0, &p->zErrmsg + ); + } - if( pNode->iNode==1 ){ - pRight = nodeNew(pRtree, pNode); - pLeft = nodeNew(pRtree, pNode); - pRtree->iDepth++; - pNode->isDirty = 1; - writeInt16(pNode->zData, pRtree->iDepth); - }else{ - pLeft = pNode; - pRight = nodeNew(pRtree, pLeft->pParent); - nodeReference(pLeft); + if( rc==SQLITE_OK ){ + if( pIter->bCleanup ){ + rbuObjIterFreeCols(pIter); + pIter->bCleanup = 0; + rc = sqlite3_step(pIter->pTblIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pTblIter, &p->zErrmsg); + pIter->zTbl = 0; + }else{ + pIter->zTbl = (const char*)sqlite3_column_text(pIter->pTblIter, 0); + rc = pIter->zTbl ? SQLITE_OK : SQLITE_NOMEM; + } + }else{ + if( pIter->zIdx==0 ){ + sqlite3_stmt *pIdx = pIter->pIdxIter; + rc = sqlite3_bind_text(pIdx, 1, pIter->zTbl, -1, SQLITE_STATIC); + } + if( rc==SQLITE_OK ){ + rc = sqlite3_step(pIter->pIdxIter); + if( rc!=SQLITE_ROW ){ + rc = resetAndCollectError(pIter->pIdxIter, &p->zErrmsg); + pIter->bCleanup = 1; + pIter->zIdx = 0; + }else{ + pIter->zIdx = (const char*)sqlite3_column_text(pIter->pIdxIter, 0); + pIter->iTnum = sqlite3_column_int(pIter->pIdxIter, 1); + pIter->bUnique = sqlite3_column_int(pIter->pIdxIter, 2); + rc = pIter->zIdx ? SQLITE_OK : SQLITE_NOMEM; + } + } + } + } } - if( !pLeft || !pRight ){ - rc = SQLITE_NOMEM; - goto splitnode_out; + if( rc!=SQLITE_OK ){ + rbuObjIterFinalize(pIter); + p->rc = rc; } + return rc; +} - memset(pLeft->zData, 0, pRtree->iNodeSize); - memset(pRight->zData, 0, pRtree->iNodeSize); +/* +** Initialize the iterator structure passed as the second argument. +** +** If no error occurs, SQLITE_OK is returned and the iterator is left +** pointing to the first entry. Otherwise, an error code and message is +** left in the RBU handle passed as the first argument. A copy of the +** error code is returned. +*/ +static int rbuObjIterFirst(sqlite3rbu *p, RbuObjIter *pIter){ + int rc; + memset(pIter, 0, sizeof(RbuObjIter)); - rc = splitNodeStartree(pRtree, aCell, nCell, pLeft, pRight, - &leftbbox, &rightbbox); - if( rc!=SQLITE_OK ){ - goto splitnode_out; - } + rc = prepareAndCollectError(p->dbRbu, &pIter->pTblIter, &p->zErrmsg, + "SELECT substr(name, 6) FROM sqlite_master " + "WHERE type IN ('table', 'view') AND name LIKE 'data_%'" + ); - /* Ensure both child nodes have node numbers assigned to them by calling - ** nodeWrite(). Node pRight always needs a node number, as it was created - ** by nodeNew() above. But node pLeft sometimes already has a node number. - ** In this case avoid the all to nodeWrite(). - */ - if( SQLITE_OK!=(rc = nodeWrite(pRtree, pRight)) - || (0==pLeft->iNode && SQLITE_OK!=(rc = nodeWrite(pRtree, pLeft))) - ){ - goto splitnode_out; + if( rc==SQLITE_OK ){ + rc = prepareAndCollectError(p->dbMain, &pIter->pIdxIter, &p->zErrmsg, + "SELECT name, rootpage, sql IS NULL OR substr(8, 6)=='UNIQUE' " + " FROM main.sqlite_master " + " WHERE type='index' AND tbl_name = ?" + ); } - rightbbox.iRowid = pRight->iNode; - leftbbox.iRowid = pLeft->iNode; + pIter->bCleanup = 1; + p->rc = rc; + return rbuObjIterNext(p, pIter); +} - if( pNode->iNode==1 ){ - rc = rtreeInsertCell(pRtree, pLeft->pParent, &leftbbox, iHeight+1); - if( rc!=SQLITE_OK ){ - goto splitnode_out; - } +/* +** This is a wrapper around "sqlite3_mprintf(zFmt, ...)". If an OOM occurs, +** an error code is stored in the RBU handle passed as the first argument. +** +** If an error has already occurred (p->rc is already set to something other +** than SQLITE_OK), then this function returns NULL without modifying the +** stored error code. In this case it still calls sqlite3_free() on any +** printf() parameters associated with %z conversions. +*/ +static char *rbuMPrintf(sqlite3rbu *p, const char *zFmt, ...){ + char *zSql = 0; + va_list ap; + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + if( p->rc==SQLITE_OK ){ + if( zSql==0 ) p->rc = SQLITE_NOMEM; }else{ - RtreeNode *pParent = pLeft->pParent; - int iCell; - rc = nodeParentIndex(pRtree, pLeft, &iCell); - if( rc==SQLITE_OK ){ - nodeOverwriteCell(pRtree, pParent, &leftbbox, iCell); - rc = AdjustTree(pRtree, pParent, &leftbbox); - } - if( rc!=SQLITE_OK ){ - goto splitnode_out; - } - } - if( (rc = rtreeInsertCell(pRtree, pRight->pParent, &rightbbox, iHeight+1)) ){ - goto splitnode_out; + sqlite3_free(zSql); + zSql = 0; } + va_end(ap); + return zSql; +} - for(i=0; i<NCELL(pRight); i++){ - i64 iRowid = nodeGetRowid(pRtree, pRight, i); - rc = updateMapping(pRtree, iRowid, pRight, iHeight); - if( iRowid==pCell->iRowid ){ - newCellIsRight = 1; - } - if( rc!=SQLITE_OK ){ - goto splitnode_out; +/* +** Argument zFmt is a sqlite3_mprintf() style format string. The trailing +** arguments are the usual subsitution values. This function performs +** the printf() style substitutions and executes the result as an SQL +** statement on the RBU handles database. +** +** If an error occurs, an error code and error message is stored in the +** RBU handle. If an error has already occurred when this function is +** called, it is a no-op. +*/ +static int rbuMPrintfExec(sqlite3rbu *p, sqlite3 *db, const char *zFmt, ...){ + va_list ap; + char *zSql; + va_start(ap, zFmt); + zSql = sqlite3_vmprintf(zFmt, ap); + if( p->rc==SQLITE_OK ){ + if( zSql==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + p->rc = sqlite3_exec(db, zSql, 0, 0, &p->zErrmsg); } } - if( pNode->iNode==1 ){ - for(i=0; i<NCELL(pLeft); i++){ - i64 iRowid = nodeGetRowid(pRtree, pLeft, i); - rc = updateMapping(pRtree, iRowid, pLeft, iHeight); - if( rc!=SQLITE_OK ){ - goto splitnode_out; - } + sqlite3_free(zSql); + va_end(ap); + return p->rc; +} + +/* +** Attempt to allocate and return a pointer to a zeroed block of nByte +** bytes. +** +** If an error (i.e. an OOM condition) occurs, return NULL and leave an +** error code in the rbu handle passed as the first argument. Or, if an +** error has already occurred when this function is called, return NULL +** immediately without attempting the allocation or modifying the stored +** error code. +*/ +static void *rbuMalloc(sqlite3rbu *p, int nByte){ + void *pRet = 0; + if( p->rc==SQLITE_OK ){ + assert( nByte>0 ); + pRet = sqlite3_malloc(nByte); + if( pRet==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, nByte); } - }else if( newCellIsRight==0 ){ - rc = updateMapping(pRtree, pCell->iRowid, pLeft, iHeight); } + return pRet; +} - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pRight); - pRight = 0; - } - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pLeft); - pLeft = 0; - } -splitnode_out: - nodeRelease(pRtree, pRight); - nodeRelease(pRtree, pLeft); - sqlite3_free(aCell); - return rc; +/* +** Allocate and zero the pIter->azTblCol[] and abTblPk[] arrays so that +** there is room for at least nCol elements. If an OOM occurs, store an +** error code in the RBU handle passed as the first argument. +*/ +static void rbuAllocateIterArrays(sqlite3rbu *p, RbuObjIter *pIter, int nCol){ + int nByte = (2*sizeof(char*) + sizeof(int) + 3*sizeof(u8)) * nCol; + char **azNew; + + azNew = (char**)rbuMalloc(p, nByte); + if( azNew ){ + pIter->azTblCol = azNew; + pIter->azTblType = &azNew[nCol]; + pIter->aiSrcOrder = (int*)&pIter->azTblType[nCol]; + pIter->abTblPk = (u8*)&pIter->aiSrcOrder[nCol]; + pIter->abNotNull = (u8*)&pIter->abTblPk[nCol]; + pIter->abIndexed = (u8*)&pIter->abNotNull[nCol]; + } } /* -** If node pLeaf is not the root of the r-tree and its pParent pointer is -** still NULL, load all ancestor nodes of pLeaf into memory and populate -** the pLeaf->pParent chain all the way up to the root node. +** The first argument must be a nul-terminated string. This function +** returns a copy of the string in memory obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free this memory +** using sqlite3_free(). ** -** This operation is required when a row is deleted (or updated - an update -** is implemented as a delete followed by an insert). SQLite provides the -** rowid of the row to delete, which can be used to find the leaf on which -** the entry resides (argument pLeaf). Once the leaf is located, this -** function is called to determine its ancestry. +** If an OOM condition is encountered when attempting to allocate memory, +** output variable (*pRc) is set to SQLITE_NOMEM before returning. Otherwise, +** if the allocation succeeds, (*pRc) is left unchanged. */ -static int fixLeafParent(Rtree *pRtree, RtreeNode *pLeaf){ - int rc = SQLITE_OK; - RtreeNode *pChild = pLeaf; - while( rc==SQLITE_OK && pChild->iNode!=1 && pChild->pParent==0 ){ - int rc2 = SQLITE_OK; /* sqlite3_reset() return code */ - sqlite3_bind_int64(pRtree->pReadParent, 1, pChild->iNode); - rc = sqlite3_step(pRtree->pReadParent); - if( rc==SQLITE_ROW ){ - RtreeNode *pTest; /* Used to test for reference loops */ - i64 iNode; /* Node number of parent node */ +static char *rbuStrndup(const char *zStr, int *pRc){ + char *zRet = 0; - /* Before setting pChild->pParent, test that we are not creating a - ** loop of references (as we would if, say, pChild==pParent). We don't - ** want to do this as it leads to a memory leak when trying to delete - ** the referenced counted node structures. - */ - iNode = sqlite3_column_int64(pRtree->pReadParent, 0); - for(pTest=pLeaf; pTest && pTest->iNode!=iNode; pTest=pTest->pParent); - if( !pTest ){ - rc2 = nodeAcquire(pRtree, iNode, 0, &pChild->pParent); - } + assert( *pRc==SQLITE_OK ); + if( zStr ){ + int nCopy = strlen(zStr) + 1; + zRet = (char*)sqlite3_malloc(nCopy); + if( zRet ){ + memcpy(zRet, zStr, nCopy); + }else{ + *pRc = SQLITE_NOMEM; } - rc = sqlite3_reset(pRtree->pReadParent); - if( rc==SQLITE_OK ) rc = rc2; - if( rc==SQLITE_OK && !pChild->pParent ) rc = SQLITE_CORRUPT_VTAB; - pChild = pChild->pParent; } - return rc; -} - -static int deleteCell(Rtree *, RtreeNode *, int, int); - -static int removeNode(Rtree *pRtree, RtreeNode *pNode, int iHeight){ - int rc; - int rc2; - RtreeNode *pParent = 0; - int iCell; - assert( pNode->nRef==1 ); + return zRet; +} - /* Remove the entry in the parent cell. */ - rc = nodeParentIndex(pRtree, pNode, &iCell); - if( rc==SQLITE_OK ){ - pParent = pNode->pParent; - pNode->pParent = 0; - rc = deleteCell(pRtree, pParent, iCell, iHeight+1); - } - rc2 = nodeRelease(pRtree, pParent); - if( rc==SQLITE_OK ){ - rc = rc2; +/* +** Finalize the statement passed as the second argument. +** +** If the sqlite3_finalize() call indicates that an error occurs, and the +** rbu handle error code is not already set, set the error code and error +** message accordingly. +*/ +static void rbuFinalize(sqlite3rbu *p, sqlite3_stmt *pStmt){ + sqlite3 *db = sqlite3_db_handle(pStmt); + int rc = sqlite3_finalize(pStmt); + if( p->rc==SQLITE_OK && rc!=SQLITE_OK ){ + p->rc = rc; + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + } +} + +/* Determine the type of a table. +** +** peType is of type (int*), a pointer to an output parameter of type +** (int). This call sets the output parameter as follows, depending +** on the type of the table specified by parameters dbName and zTbl. +** +** RBU_PK_NOTABLE: No such table. +** RBU_PK_NONE: Table has an implicit rowid. +** RBU_PK_IPK: Table has an explicit IPK column. +** RBU_PK_EXTERNAL: Table has an external PK index. +** RBU_PK_WITHOUT_ROWID: Table is WITHOUT ROWID. +** RBU_PK_VTAB: Table is a virtual table. +** +** Argument *piPk is also of type (int*), and also points to an output +** parameter. Unless the table has an external primary key index +** (i.e. unless *peType is set to 3), then *piPk is set to zero. Or, +** if the table does have an external primary key index, then *piPk +** is set to the root page number of the primary key index before +** returning. +** +** ALGORITHM: +** +** if( no entry exists in sqlite_master ){ +** return RBU_PK_NOTABLE +** }else if( sql for the entry starts with "CREATE VIRTUAL" ){ +** return RBU_PK_VTAB +** }else if( "PRAGMA index_list()" for the table contains a "pk" index ){ +** if( the index that is the pk exists in sqlite_master ){ +** *piPK = rootpage of that index. +** return RBU_PK_EXTERNAL +** }else{ +** return RBU_PK_WITHOUT_ROWID +** } +** }else if( "PRAGMA table_info()" lists one or more "pk" columns ){ +** return RBU_PK_IPK +** }else{ +** return RBU_PK_NONE +** } +*/ +static void rbuTableType( + sqlite3rbu *p, + const char *zTab, + int *peType, + int *piTnum, + int *piPk +){ + /* + ** 0) SELECT count(*) FROM sqlite_master where name=%Q AND IsVirtual(%Q) + ** 1) PRAGMA index_list = ? + ** 2) SELECT count(*) FROM sqlite_master where name=%Q + ** 3) PRAGMA table_info = ? + */ + sqlite3_stmt *aStmt[4] = {0, 0, 0, 0}; + + *peType = RBU_PK_NOTABLE; + *piPk = 0; + + assert( p->rc==SQLITE_OK ); + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[0], &p->zErrmsg, + sqlite3_mprintf( + "SELECT (sql LIKE 'create virtual%%'), rootpage" + " FROM sqlite_master" + " WHERE name=%Q", zTab + )); + if( p->rc!=SQLITE_OK || sqlite3_step(aStmt[0])!=SQLITE_ROW ){ + /* Either an error, or no such table. */ + goto rbuTableType_end; } - if( rc!=SQLITE_OK ){ - return rc; + if( sqlite3_column_int(aStmt[0], 0) ){ + *peType = RBU_PK_VTAB; /* virtual table */ + goto rbuTableType_end; } + *piTnum = sqlite3_column_int(aStmt[0], 1); - /* Remove the xxx_node entry. */ - sqlite3_bind_int64(pRtree->pDeleteNode, 1, pNode->iNode); - sqlite3_step(pRtree->pDeleteNode); - if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteNode)) ){ - return rc; + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[1], &p->zErrmsg, + sqlite3_mprintf("PRAGMA index_list=%Q",zTab) + ); + if( p->rc ) goto rbuTableType_end; + while( sqlite3_step(aStmt[1])==SQLITE_ROW ){ + const u8 *zOrig = sqlite3_column_text(aStmt[1], 3); + const u8 *zIdx = sqlite3_column_text(aStmt[1], 1); + if( zOrig && zIdx && zOrig[0]=='p' ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[2], &p->zErrmsg, + sqlite3_mprintf( + "SELECT rootpage FROM sqlite_master WHERE name = %Q", zIdx + )); + if( p->rc==SQLITE_OK ){ + if( sqlite3_step(aStmt[2])==SQLITE_ROW ){ + *piPk = sqlite3_column_int(aStmt[2], 0); + *peType = RBU_PK_EXTERNAL; + }else{ + *peType = RBU_PK_WITHOUT_ROWID; + } + } + goto rbuTableType_end; + } } - /* Remove the xxx_parent entry. */ - sqlite3_bind_int64(pRtree->pDeleteParent, 1, pNode->iNode); - sqlite3_step(pRtree->pDeleteParent); - if( SQLITE_OK!=(rc = sqlite3_reset(pRtree->pDeleteParent)) ){ - return rc; + p->rc = prepareFreeAndCollectError(p->dbMain, &aStmt[3], &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info=%Q",zTab) + ); + if( p->rc==SQLITE_OK ){ + while( sqlite3_step(aStmt[3])==SQLITE_ROW ){ + if( sqlite3_column_int(aStmt[3],5)>0 ){ + *peType = RBU_PK_IPK; /* explicit IPK column */ + goto rbuTableType_end; + } + } + *peType = RBU_PK_NONE; } - - /* Remove the node from the in-memory hash table and link it into - ** the Rtree.pDeleted list. Its contents will be re-inserted later on. - */ - nodeHashDelete(pRtree, pNode); - pNode->iNode = iHeight; - pNode->pNext = pRtree->pDeleted; - pNode->nRef++; - pRtree->pDeleted = pNode; - return SQLITE_OK; +rbuTableType_end: { + int i; + for(i=0; i<sizeof(aStmt)/sizeof(aStmt[0]); i++){ + rbuFinalize(p, aStmt[i]); + } + } } -static int fixBoundingBox(Rtree *pRtree, RtreeNode *pNode){ - RtreeNode *pParent = pNode->pParent; - int rc = SQLITE_OK; - if( pParent ){ - int ii; - int nCell = NCELL(pNode); - RtreeCell box; /* Bounding box for pNode */ - nodeGetCell(pRtree, pNode, 0, &box); - for(ii=1; ii<nCell; ii++){ - RtreeCell cell; - nodeGetCell(pRtree, pNode, ii, &cell); - cellUnion(pRtree, &box, &cell); - } - box.iRowid = pNode->iNode; - rc = nodeParentIndex(pRtree, pNode, &ii); - if( rc==SQLITE_OK ){ - nodeOverwriteCell(pRtree, pParent, &box, ii); - rc = fixBoundingBox(pRtree, pParent); +/* +** This is a helper function for rbuObjIterCacheTableInfo(). It populates +** the pIter->abIndexed[] array. +*/ +static void rbuObjIterCacheIndexedCols(sqlite3rbu *p, RbuObjIter *pIter){ + sqlite3_stmt *pList = 0; + int bIndex = 0; + + if( p->rc==SQLITE_OK ){ + memcpy(pIter->abIndexed, pIter->abTblPk, sizeof(u8)*pIter->nTblCol); + p->rc = prepareFreeAndCollectError(p->dbMain, &pList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + } + + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pList) ){ + const char *zIdx = (const char*)sqlite3_column_text(pList, 1); + sqlite3_stmt *pXInfo = 0; + if( zIdx==0 ) break; + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + if( iCid>=0 ) pIter->abIndexed[iCid] = 1; } + rbuFinalize(p, pXInfo); + bIndex = 1; } - return rc; + + rbuFinalize(p, pList); + if( bIndex==0 ) pIter->abIndexed = 0; } + /* -** Delete the cell at index iCell of node pNode. After removing the -** cell, adjust the r-tree data structure if required. +** If they are not already populated, populate the pIter->azTblCol[], +** pIter->abTblPk[], pIter->nTblCol and pIter->bRowid variables according to +** the table (not index) that the iterator currently points to. +** +** Return SQLITE_OK if successful, or an SQLite error code otherwise. If +** an error does occur, an error code and error message are also left in +** the RBU handle. */ -static int deleteCell(Rtree *pRtree, RtreeNode *pNode, int iCell, int iHeight){ - RtreeNode *pParent; - int rc; +static int rbuObjIterCacheTableInfo(sqlite3rbu *p, RbuObjIter *pIter){ + if( pIter->azTblCol==0 ){ + sqlite3_stmt *pStmt = 0; + int nCol = 0; + int i; /* for() loop iterator variable */ + int bRbuRowid = 0; /* If input table has column "rbu_rowid" */ + int iOrder = 0; + int iTnum = 0; + + /* Figure out the type of table this step will deal with. */ + assert( pIter->eType==0 ); + rbuTableType(p, pIter->zTbl, &pIter->eType, &iTnum, &pIter->iPkTnum); + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_NOTABLE ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("no such table: %s", pIter->zTbl); + } + if( p->rc ) return p->rc; + if( pIter->zIdx==0 ) pIter->iTnum = iTnum; + + assert( pIter->eType==RBU_PK_NONE || pIter->eType==RBU_PK_IPK + || pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_WITHOUT_ROWID + || pIter->eType==RBU_PK_VTAB + ); - if( SQLITE_OK!=(rc = fixLeafParent(pRtree, pNode)) ){ - return rc; - } + /* Populate the azTblCol[] and nTblCol variables based on the columns + ** of the input table. Ignore any input table columns that begin with + ** "rbu_". */ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pStmt, &p->zErrmsg, + sqlite3_mprintf("SELECT * FROM 'data_%q'", pIter->zTbl) + ); + if( p->rc==SQLITE_OK ){ + nCol = sqlite3_column_count(pStmt); + rbuAllocateIterArrays(p, pIter, nCol); + } + for(i=0; p->rc==SQLITE_OK && i<nCol; i++){ + const char *zName = (const char*)sqlite3_column_name(pStmt, i); + if( sqlite3_strnicmp("rbu_", zName, 4) ){ + char *zCopy = rbuStrndup(zName, &p->rc); + pIter->aiSrcOrder[pIter->nTblCol] = pIter->nTblCol; + pIter->azTblCol[pIter->nTblCol++] = zCopy; + } + else if( 0==sqlite3_stricmp("rbu_rowid", zName) ){ + bRbuRowid = 1; + } + } + sqlite3_finalize(pStmt); + pStmt = 0; - /* Remove the cell from the node. This call just moves bytes around - ** the in-memory node image, so it cannot fail. - */ - nodeDeleteCell(pRtree, pNode, iCell); + if( p->rc==SQLITE_OK + && bRbuRowid!=(pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE) + ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf( + "table data_%q %s rbu_rowid column", pIter->zTbl, + (bRbuRowid ? "may not have" : "requires") + ); + } - /* If the node is not the tree root and now has less than the minimum - ** number of cells, remove it from the tree. Otherwise, update the - ** cell in the parent node so that it tightly contains the updated - ** node. - */ - pParent = pNode->pParent; - assert( pParent || pNode->iNode==1 ); - if( pParent ){ - if( NCELL(pNode)<RTREE_MINCELLS(pRtree) ){ - rc = removeNode(pRtree, pNode, iHeight); - }else{ - rc = fixBoundingBox(pRtree, pNode); + /* Check that all non-HIDDEN columns in the destination table are also + ** present in the input table. Populate the abTblPk[], azTblType[] and + ** aiTblOrder[] arrays at the same time. */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pStmt, &p->zErrmsg, + sqlite3_mprintf("PRAGMA table_info(%Q)", pIter->zTbl) + ); + } + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + const char *zName = (const char*)sqlite3_column_text(pStmt, 1); + if( zName==0 ) break; /* An OOM - finalize() below returns S_NOMEM */ + for(i=iOrder; i<pIter->nTblCol; i++){ + if( 0==strcmp(zName, pIter->azTblCol[i]) ) break; + } + if( i==pIter->nTblCol ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("column missing from data_%q: %s", + pIter->zTbl, zName + ); + }else{ + int iPk = sqlite3_column_int(pStmt, 5); + int bNotNull = sqlite3_column_int(pStmt, 3); + const char *zType = (const char*)sqlite3_column_text(pStmt, 2); + + if( i!=iOrder ){ + SWAP(int, pIter->aiSrcOrder[i], pIter->aiSrcOrder[iOrder]); + SWAP(char*, pIter->azTblCol[i], pIter->azTblCol[iOrder]); + } + + pIter->azTblType[iOrder] = rbuStrndup(zType, &p->rc); + pIter->abTblPk[iOrder] = (iPk!=0); + pIter->abNotNull[iOrder] = (u8)bNotNull || (iPk!=0); + iOrder++; + } } + + rbuFinalize(p, pStmt); + rbuObjIterCacheIndexedCols(p, pIter); + assert( pIter->eType!=RBU_PK_VTAB || pIter->abIndexed==0 ); } - return rc; + return p->rc; } -static int Reinsert( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight +/* +** This function constructs and returns a pointer to a nul-terminated +** string containing some SQL clause or list based on one or more of the +** column names currently stored in the pIter->azTblCol[] array. +*/ +static char *rbuObjIterGetCollist( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter /* Object iterator for column names */ ){ - int *aOrder; - int *aSpare; - RtreeCell *aCell; - RtreeDValue *aDistance; - int nCell; - RtreeDValue aCenterCoord[RTREE_MAX_DIMENSIONS]; - int iDim; - int ii; - int rc = SQLITE_OK; - int n; - - memset(aCenterCoord, 0, sizeof(RtreeDValue)*RTREE_MAX_DIMENSIONS); + char *zList = 0; + const char *zSep = ""; + int i; + for(i=0; i<pIter->nTblCol; i++){ + const char *z = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"", zList, zSep, z); + zSep = ", "; + } + return zList; +} - nCell = NCELL(pNode)+1; - n = (nCell+1)&(~1); +/* +** This function is used to create a SELECT list (the list of SQL +** expressions that follows a SELECT keyword) for a SELECT statement +** used to read from an data_xxx or rbu_tmp_xxx table while updating the +** index object currently indicated by the iterator object passed as the +** second argument. A "PRAGMA index_xinfo = <idxname>" statement is used +** to obtain the required information. +** +** If the index is of the following form: +** +** CREATE INDEX i1 ON t1(c, b COLLATE nocase); +** +** and "t1" is a table with an explicit INTEGER PRIMARY KEY column +** "ipk", the returned string is: +** +** "`c` COLLATE 'BINARY', `b` COLLATE 'NOCASE', `ipk` COLLATE 'BINARY'" +** +** As well as the returned string, three other malloc'd strings are +** returned via output parameters. As follows: +** +** pzImposterCols: ... +** pzImposterPk: ... +** pzWhere: ... +*/ +static char *rbuObjIterGetIndexCols( + sqlite3rbu *p, /* RBU object */ + RbuObjIter *pIter, /* Object iterator for column names */ + char **pzImposterCols, /* OUT: Columns for imposter table */ + char **pzImposterPk, /* OUT: Imposter PK clause */ + char **pzWhere, /* OUT: WHERE clause */ + int *pnBind /* OUT: Trbul number of columns */ +){ + int rc = p->rc; /* Error code */ + int rc2; /* sqlite3_finalize() return code */ + char *zRet = 0; /* String to return */ + char *zImpCols = 0; /* String to return via *pzImposterCols */ + char *zImpPK = 0; /* String to return via *pzImposterPK */ + char *zWhere = 0; /* String to return via *pzWhere */ + int nBind = 0; /* Value to return via *pnBind */ + const char *zCom = ""; /* Set to ", " later on */ + const char *zAnd = ""; /* Set to " AND " later on */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = ? */ - /* Allocate the buffers used by this operation. The allocation is - ** relinquished before this function returns. - */ - aCell = (RtreeCell *)sqlite3_malloc(n * ( - sizeof(RtreeCell) + /* aCell array */ - sizeof(int) + /* aOrder array */ - sizeof(int) + /* aSpare array */ - sizeof(RtreeDValue) /* aDistance array */ - )); - if( !aCell ){ - return SQLITE_NOMEM; + if( rc==SQLITE_OK ){ + assert( p->zErrmsg==0 ); + rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", pIter->zIdx) + ); } - aOrder = (int *)&aCell[n]; - aSpare = (int *)&aOrder[n]; - aDistance = (RtreeDValue *)&aSpare[n]; - for(ii=0; ii<nCell; ii++){ - if( ii==(nCell-1) ){ - memcpy(&aCell[ii], pCell, sizeof(RtreeCell)); + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + const char *zCol; + const char *zType; + + if( iCid<0 ){ + /* An integer primary key. If the table has an explicit IPK, use + ** its name. Otherwise, use "rbu_rowid". */ + if( pIter->eType==RBU_PK_IPK ){ + int i; + for(i=0; pIter->abTblPk[i]==0; i++); + assert( i<pIter->nTblCol ); + zCol = pIter->azTblCol[i]; + }else{ + zCol = "rbu_rowid"; + } + zType = "INTEGER"; }else{ - nodeGetCell(pRtree, pNode, ii, &aCell[ii]); + zCol = pIter->azTblCol[iCid]; + zType = pIter->azTblType[iCid]; } - aOrder[ii] = ii; - for(iDim=0; iDim<pRtree->nDim; iDim++){ - aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2]); - aCenterCoord[iDim] += DCOORD(aCell[ii].aCoord[iDim*2+1]); + + zRet = sqlite3_mprintf("%z%s\"%w\" COLLATE %Q", zRet, zCom, zCol, zCollate); + if( pIter->bUnique==0 || sqlite3_column_int(pXInfo, 5) ){ + const char *zOrder = (bDesc ? " DESC" : ""); + zImpPK = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\"%s", + zImpPK, zCom, nBind, zCol, zOrder + ); } - } - for(iDim=0; iDim<pRtree->nDim; iDim++){ - aCenterCoord[iDim] = (aCenterCoord[iDim]/(nCell*(RtreeDValue)2)); + zImpCols = sqlite3_mprintf("%z%s\"rbu_imp_%d%w\" %s COLLATE %Q", + zImpCols, zCom, nBind, zCol, zType, zCollate + ); + zWhere = sqlite3_mprintf( + "%z%s\"rbu_imp_%d%w\" IS ?", zWhere, zAnd, nBind, zCol + ); + if( zRet==0 || zImpPK==0 || zImpCols==0 || zWhere==0 ) rc = SQLITE_NOMEM; + zCom = ", "; + zAnd = " AND "; + nBind++; } - for(ii=0; ii<nCell; ii++){ - aDistance[ii] = RTREE_ZERO; - for(iDim=0; iDim<pRtree->nDim; iDim++){ - RtreeDValue coord = (DCOORD(aCell[ii].aCoord[iDim*2+1]) - - DCOORD(aCell[ii].aCoord[iDim*2])); - aDistance[ii] += (coord-aCenterCoord[iDim])*(coord-aCenterCoord[iDim]); - } + rc2 = sqlite3_finalize(pXInfo); + if( rc==SQLITE_OK ) rc = rc2; + + if( rc!=SQLITE_OK ){ + sqlite3_free(zRet); + sqlite3_free(zImpCols); + sqlite3_free(zImpPK); + sqlite3_free(zWhere); + zRet = 0; + zImpCols = 0; + zImpPK = 0; + zWhere = 0; + p->rc = rc; } - SortByDistance(aOrder, nCell, aDistance, aSpare); - nodeZero(pRtree, pNode); + *pzImposterCols = zImpCols; + *pzImposterPk = zImpPK; + *pzWhere = zWhere; + *pnBind = nBind; + return zRet; +} - for(ii=0; rc==SQLITE_OK && ii<(nCell-(RTREE_MINCELLS(pRtree)+1)); ii++){ - RtreeCell *p = &aCell[aOrder[ii]]; - nodeInsertCell(pRtree, pNode, p); - if( p->iRowid==pCell->iRowid ){ - if( iHeight==0 ){ - rc = rowidWrite(pRtree, p->iRowid, pNode->iNode); +/* +** Assuming the current table columns are "a", "b" and "c", and the zObj +** paramter is passed "old", return a string of the form: +** +** "old.a, old.b, old.b" +** +** With the column names escaped. +** +** For tables with implicit rowids - RBU_PK_EXTERNAL and RBU_PK_NONE, append +** the text ", old._rowid_" to the returned value. +*/ +static char *rbuObjIterGetOldlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zObj +){ + char *zList = 0; + if( p->rc==SQLITE_OK && pIter->abIndexed ){ + const char *zS = ""; + int i; + for(i=0; i<pIter->nTblCol; i++){ + if( pIter->abIndexed[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = sqlite3_mprintf("%z%s%s.\"%w\"", zList, zS, zObj, zCol); }else{ - rc = parentWrite(pRtree, p->iRowid, pNode->iNode); + zList = sqlite3_mprintf("%z%sNULL", zList, zS); } - } - } - if( rc==SQLITE_OK ){ - rc = fixBoundingBox(pRtree, pNode); - } - for(; rc==SQLITE_OK && ii<nCell; ii++){ - /* Find a node to store this cell in. pNode->iNode currently contains - ** the height of the sub-tree headed by the cell. - */ - RtreeNode *pInsert; - RtreeCell *p = &aCell[aOrder[ii]]; - rc = ChooseLeaf(pRtree, p, iHeight, &pInsert); - if( rc==SQLITE_OK ){ - int rc2; - rc = rtreeInsertCell(pRtree, pInsert, p, iHeight); - rc2 = nodeRelease(pRtree, pInsert); - if( rc==SQLITE_OK ){ - rc = rc2; + zS = ", "; + if( zList==0 ){ + p->rc = SQLITE_NOMEM; + break; } } - } - sqlite3_free(aCell); - return rc; + /* For a table with implicit rowids, append "old._rowid_" to the list. */ + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "%z, %s._rowid_", zList, zObj); + } + } + return zList; } /* -** Insert cell pCell into node pNode. Node pNode is the head of a -** subtree iHeight high (leaf nodes have iHeight==0). +** Return an expression that can be used in a WHERE clause to match the +** primary key of the current table. For example, if the table is: +** +** CREATE TABLE t1(a, b, c, PRIMARY KEY(b, c)); +** +** Return the string: +** +** "b = ?1 AND c = ?2" */ -static int rtreeInsertCell( - Rtree *pRtree, - RtreeNode *pNode, - RtreeCell *pCell, - int iHeight +static char *rbuObjIterGetWhere( + sqlite3rbu *p, + RbuObjIter *pIter ){ - int rc = SQLITE_OK; - if( iHeight>0 ){ - RtreeNode *pChild = nodeHashLookup(pRtree, pCell->iRowid); - if( pChild ){ - nodeRelease(pRtree, pChild->pParent); - nodeReference(pNode); - pChild->pParent = pNode; - } - } - if( nodeInsertCell(pRtree, pNode, pCell) ){ - if( iHeight<=pRtree->iReinsertHeight || pNode->iNode==1){ - rc = SplitNode(pRtree, pNode, pCell, iHeight); - }else{ - pRtree->iReinsertHeight = iHeight; - rc = Reinsert(pRtree, pNode, pCell, iHeight); + char *zList = 0; + if( pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE ){ + zList = rbuMPrintf(p, "_rowid_ = ?%d", pIter->nTblCol+1); + }else if( pIter->eType==RBU_PK_EXTERNAL ){ + const char *zSep = ""; + int i; + for(i=0; i<pIter->nTblCol; i++){ + if( pIter->abTblPk[i] ){ + zList = rbuMPrintf(p, "%z%sc%d=?%d", zList, zSep, i, i+1); + zSep = " AND "; + } } + zList = rbuMPrintf(p, + "_rowid_ = (SELECT id FROM rbu_imposter2 WHERE %z)", zList + ); + }else{ - rc = AdjustTree(pRtree, pNode, pCell); - if( rc==SQLITE_OK ){ - if( iHeight==0 ){ - rc = rowidWrite(pRtree, pCell->iRowid, pNode->iNode); - }else{ - rc = parentWrite(pRtree, pCell->iRowid, pNode->iNode); + const char *zSep = ""; + int i; + for(i=0; i<pIter->nTblCol; i++){ + if( pIter->abTblPk[i] ){ + const char *zCol = pIter->azTblCol[i]; + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", zList, zSep, zCol, i+1); + zSep = " AND "; } } } - return rc; + return zList; } -static int reinsertNodeContent(Rtree *pRtree, RtreeNode *pNode){ - int ii; - int rc = SQLITE_OK; - int nCell = NCELL(pNode); +/* +** The SELECT statement iterating through the keys for the current object +** (p->objiter.pSelect) currently points to a valid row. However, there +** is something wrong with the rbu_control value in the rbu_control value +** stored in the (p->nCol+1)'th column. Set the error code and error message +** of the RBU handle to something reflecting this. +*/ +static void rbuBadControlError(sqlite3rbu *p){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("invalid rbu_control value"); +} - for(ii=0; rc==SQLITE_OK && ii<nCell; ii++){ - RtreeNode *pInsert; - RtreeCell cell; - nodeGetCell(pRtree, pNode, ii, &cell); - /* Find a node to store this cell in. pNode->iNode currently contains - ** the height of the sub-tree headed by the cell. - */ - rc = ChooseLeaf(pRtree, &cell, (int)pNode->iNode, &pInsert); - if( rc==SQLITE_OK ){ - int rc2; - rc = rtreeInsertCell(pRtree, pInsert, &cell, (int)pNode->iNode); - rc2 = nodeRelease(pRtree, pInsert); - if( rc==SQLITE_OK ){ - rc = rc2; +/* +** Return a nul-terminated string containing the comma separated list of +** assignments that should be included following the "SET" keyword of +** an UPDATE statement used to update the table object that the iterator +** passed as the second argument currently points to if the rbu_control +** column of the data_xxx table entry is set to zMask. +** +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. +*/ +static char *rbuObjIterGetSetlist( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zMask +){ + char *zList = 0; + if( p->rc==SQLITE_OK ){ + int i; + + if( strlen(zMask)!=pIter->nTblCol ){ + rbuBadControlError(p); + }else{ + const char *zSep = ""; + for(i=0; i<pIter->nTblCol; i++){ + char c = zMask[pIter->aiSrcOrder[i]]; + if( c=='x' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=?%d", + zList, zSep, pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } + if( c=='d' ){ + zList = rbuMPrintf(p, "%z%s\"%w\"=rbu_delta(\"%w\", ?%d)", + zList, zSep, pIter->azTblCol[i], pIter->azTblCol[i], i+1 + ); + zSep = ", "; + } } } } - return rc; + return zList; } /* -** Select a currently unused rowid for a new r-tree record. +** Return a nul-terminated string consisting of nByte comma separated +** "?" expressions. For example, if nByte is 3, return a pointer to +** a buffer containing the string "?,?,?". +** +** The memory for the returned string is obtained from sqlite3_malloc(). +** It is the responsibility of the caller to eventually free it using +** sqlite3_free(). +** +** If an OOM error is encountered when allocating space for the new +** string, an error code is left in the rbu handle passed as the first +** argument and NULL is returned. Or, if an error has already occurred +** when this function is called, NULL is returned immediately, without +** attempting the allocation or modifying the stored error code. */ -static int newRowid(Rtree *pRtree, i64 *piRowid){ - int rc; - sqlite3_bind_null(pRtree->pWriteRowid, 1); - sqlite3_bind_null(pRtree->pWriteRowid, 2); - sqlite3_step(pRtree->pWriteRowid); - rc = sqlite3_reset(pRtree->pWriteRowid); - *piRowid = sqlite3_last_insert_rowid(pRtree->db); - return rc; +static char *rbuObjIterGetBindlist(sqlite3rbu *p, int nBind){ + char *zRet = 0; + int nByte = nBind*2 + 1; + + zRet = (char*)rbuMalloc(p, nByte); + if( zRet ){ + int i; + for(i=0; i<nBind; i++){ + zRet[i*2] = '?'; + zRet[i*2+1] = (i+1==nBind) ? '\0' : ','; + } + } + return zRet; } /* -** Remove the entry with rowid=iDelete from the r-tree structure. +** The iterator currently points to a table (not index) of type +** RBU_PK_WITHOUT_ROWID. This function creates the PRIMARY KEY +** declaration for the corresponding imposter table. For example, +** if the iterator points to a table created as: +** +** CREATE TABLE t1(a, b, c, PRIMARY KEY(b, a DESC)) WITHOUT ROWID +** +** this function returns: +** +** PRIMARY KEY("b", "a" DESC) */ -static int rtreeDeleteRowid(Rtree *pRtree, sqlite3_int64 iDelete){ - int rc; /* Return code */ - RtreeNode *pLeaf = 0; /* Leaf node containing record iDelete */ - int iCell; /* Index of iDelete cell in pLeaf */ - RtreeNode *pRoot; /* Root node of rtree structure */ - - - /* Obtain a reference to the root node to initialize Rtree.iDepth */ - rc = nodeAcquire(pRtree, 1, 0, &pRoot); - - /* Obtain a reference to the leaf node that contains the entry - ** about to be deleted. - */ - if( rc==SQLITE_OK ){ - rc = findLeafNode(pRtree, iDelete, &pLeaf, 0); - } - - /* Delete the cell in question from the leaf node. */ - if( rc==SQLITE_OK ){ - int rc2; - rc = nodeRowidIndex(pRtree, pLeaf, iDelete, &iCell); - if( rc==SQLITE_OK ){ - rc = deleteCell(pRtree, pLeaf, iCell, 0); +static char *rbuWithoutRowidPK(sqlite3rbu *p, RbuObjIter *pIter){ + char *z = 0; + assert( pIter->zIdx==0 ); + if( p->rc==SQLITE_OK ){ + const char *zSep = "PRIMARY KEY("; + sqlite3_stmt *pXList = 0; /* PRAGMA index_list = (pIter->zTbl) */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA index_xinfo = <pk-index> */ + + p->rc = prepareFreeAndCollectError(p->dbMain, &pXList, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_list = %Q", pIter->zTbl) + ); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXList) ){ + const char *zOrig = (const char*)sqlite3_column_text(pXList,3); + if( zOrig && strcmp(zOrig, "pk")==0 ){ + const char *zIdx = (const char*)sqlite3_column_text(pXList,1); + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); + } + break; + } } - rc2 = nodeRelease(pRtree, pLeaf); - if( rc==SQLITE_OK ){ - rc = rc2; + rbuFinalize(p, pXList); + + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + if( sqlite3_column_int(pXInfo, 5) ){ + /* int iCid = sqlite3_column_int(pXInfo, 0); */ + const char *zCol = (const char*)sqlite3_column_text(pXInfo, 2); + const char *zDesc = sqlite3_column_int(pXInfo, 3) ? " DESC" : ""; + z = rbuMPrintf(p, "%z%s\"%w\"%s", z, zSep, zCol, zDesc); + zSep = ", "; + } } + z = rbuMPrintf(p, "%z)", z); + rbuFinalize(p, pXInfo); } + return z; +} - /* Delete the corresponding entry in the <rtree>_rowid table. */ - if( rc==SQLITE_OK ){ - sqlite3_bind_int64(pRtree->pDeleteRowid, 1, iDelete); - sqlite3_step(pRtree->pDeleteRowid); - rc = sqlite3_reset(pRtree->pDeleteRowid); - } +/* +** This function creates the second imposter table used when writing to +** a table b-tree where the table has an external primary key. If the +** iterator passed as the second argument does not currently point to +** a table (not index) with an external primary key, this function is a +** no-op. +** +** Assuming the iterator does point to a table with an external PK, this +** function creates a WITHOUT ROWID imposter table named "rbu_imposter2" +** used to access that PK index. For example, if the target table is +** declared as follows: +** +** CREATE TABLE t1(a, b TEXT, c REAL, PRIMARY KEY(b, c)); +** +** then the imposter table schema is: +** +** CREATE TABLE rbu_imposter2(c1 TEXT, c2 REAL, id INTEGER) WITHOUT ROWID; +** +*/ +static void rbuCreateImposterTable2(sqlite3rbu *p, RbuObjIter *pIter){ + if( p->rc==SQLITE_OK && pIter->eType==RBU_PK_EXTERNAL ){ + int tnum = pIter->iPkTnum; /* Root page of PK index */ + sqlite3_stmt *pQuery = 0; /* SELECT name ... WHERE rootpage = $tnum */ + const char *zIdx = 0; /* Name of PK index */ + sqlite3_stmt *pXInfo = 0; /* PRAGMA main.index_xinfo = $zIdx */ + const char *zComma = ""; + char *zCols = 0; /* Used to build up list of table cols */ + char *zPk = 0; /* Used to build up table PK declaration */ - /* Check if the root node now has exactly one child. If so, remove - ** it, schedule the contents of the child for reinsertion and - ** reduce the tree height by one. - ** - ** This is equivalent to copying the contents of the child into - ** the root node (the operation that Gutman's paper says to perform - ** in this scenario). - */ - if( rc==SQLITE_OK && pRtree->iDepth>0 && NCELL(pRoot)==1 ){ - int rc2; - RtreeNode *pChild; - i64 iChild = nodeGetRowid(pRtree, pRoot, 0); - rc = nodeAcquire(pRtree, iChild, pRoot, &pChild); - if( rc==SQLITE_OK ){ - rc = removeNode(pRtree, pChild, pRtree->iDepth-1); + /* Figure out the name of the primary key index for the current table. + ** This is needed for the argument to "PRAGMA index_xinfo". Set + ** zIdx to point to a nul-terminated string containing this name. */ + p->rc = prepareAndCollectError(p->dbMain, &pQuery, &p->zErrmsg, + "SELECT name FROM sqlite_master WHERE rootpage = ?" + ); + if( p->rc==SQLITE_OK ){ + sqlite3_bind_int(pQuery, 1, tnum); + if( SQLITE_ROW==sqlite3_step(pQuery) ){ + zIdx = (const char*)sqlite3_column_text(pQuery, 0); + } } - rc2 = nodeRelease(pRtree, pChild); - if( rc==SQLITE_OK ) rc = rc2; - if( rc==SQLITE_OK ){ - pRtree->iDepth--; - writeInt16(pRoot->zData, pRtree->iDepth); - pRoot->isDirty = 1; + if( zIdx ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pXInfo, &p->zErrmsg, + sqlite3_mprintf("PRAGMA main.index_xinfo = %Q", zIdx) + ); } - } + rbuFinalize(p, pQuery); - /* Re-insert the contents of any underfull nodes removed from the tree. */ - for(pLeaf=pRtree->pDeleted; pLeaf; pLeaf=pRtree->pDeleted){ - if( rc==SQLITE_OK ){ - rc = reinsertNodeContent(pRtree, pLeaf); + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pXInfo) ){ + int bKey = sqlite3_column_int(pXInfo, 5); + if( bKey ){ + int iCid = sqlite3_column_int(pXInfo, 1); + int bDesc = sqlite3_column_int(pXInfo, 3); + const char *zCollate = (const char*)sqlite3_column_text(pXInfo, 4); + zCols = rbuMPrintf(p, "%z%sc%d %s COLLATE %s", zCols, zComma, + iCid, pIter->azTblType[iCid], zCollate + ); + zPk = rbuMPrintf(p, "%z%sc%d%s", zPk, zComma, iCid, bDesc?" DESC":""); + zComma = ", "; + } } - pRtree->pDeleted = pLeaf->pNext; - sqlite3_free(pLeaf); - } + zCols = rbuMPrintf(p, "%z, id INTEGER", zCols); + rbuFinalize(p, pXInfo); - /* Release the reference to the root node. */ - if( rc==SQLITE_OK ){ - rc = nodeRelease(pRtree, pRoot); - }else{ - nodeRelease(pRtree, pRoot); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE rbu_imposter2(%z, PRIMARY KEY(%z)) WITHOUT ROWID", + zCols, zPk + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); } - - return rc; } /* -** Rounding constants for float->double conversion. -*/ -#define RNDTOWARDS (1.0 - 1.0/8388608.0) /* Round towards zero */ -#define RNDAWAY (1.0 + 1.0/8388608.0) /* Round away from zero */ +** If an error has already occurred when this function is called, it +** immediately returns zero (without doing any work). Or, if an error +** occurs during the execution of this function, it sets the error code +** in the sqlite3rbu object indicated by the first argument and returns +** zero. +** +** The iterator passed as the second argument is guaranteed to point to +** a table (not an index) when this function is called. This function +** attempts to create any imposter table required to write to the main +** table b-tree of the table before returning. Non-zero is returned if +** an imposter table are created, or zero otherwise. +** +** An imposter table is required in all cases except RBU_PK_VTAB. Only +** virtual tables are written to directly. The imposter table has the +** same schema as the actual target table (less any UNIQUE constraints). +** More precisely, the "same schema" means the same columns, types, +** collation sequences. For tables that do not have an external PRIMARY +** KEY, it also means the same PRIMARY KEY declaration. +*/ +static void rbuCreateImposterTable(sqlite3rbu *p, RbuObjIter *pIter){ + if( p->rc==SQLITE_OK && pIter->eType!=RBU_PK_VTAB ){ + int tnum = pIter->iTnum; + const char *zComma = ""; + char *zSql = 0; + int iCol; + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + + for(iCol=0; p->rc==SQLITE_OK && iCol<pIter->nTblCol; iCol++){ + const char *zPk = ""; + const char *zCol = pIter->azTblCol[iCol]; + const char *zColl = 0; + + p->rc = sqlite3_table_column_metadata( + p->dbMain, "main", pIter->zTbl, zCol, 0, &zColl, 0, 0, 0 + ); + + if( pIter->eType==RBU_PK_IPK && pIter->abTblPk[iCol] ){ + /* If the target table column is an "INTEGER PRIMARY KEY", add + ** "PRIMARY KEY" to the imposter table column declaration. */ + zPk = "PRIMARY KEY "; + } + zSql = rbuMPrintf(p, "%z%s\"%w\" %s %sCOLLATE %s%s", + zSql, zComma, zCol, pIter->azTblType[iCol], zPk, zColl, + (pIter->abNotNull[iCol] ? " NOT NULL" : "") + ); + zComma = ", "; + } + + if( pIter->eType==RBU_PK_WITHOUT_ROWID ){ + char *zPk = rbuWithoutRowidPK(p, pIter); + if( zPk ){ + zSql = rbuMPrintf(p, "%z, %z", zSql, zPk); + } + } + + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1, tnum); + rbuMPrintfExec(p, p->dbMain, "CREATE TABLE \"rbu_imp_%w\"(%z)%s", + pIter->zTbl, zSql, + (pIter->eType==RBU_PK_WITHOUT_ROWID ? " WITHOUT ROWID" : "") + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + } +} -#if !defined(SQLITE_RTREE_INT_ONLY) /* -** Convert an sqlite3_value into an RtreeValue (presumably a float) -** while taking care to round toward negative or positive, respectively. +** Prepare a statement used to insert rows into the "rbu_tmp_xxx" table. +** Specifically a statement of the form: +** +** INSERT INTO rbu_tmp_xxx VALUES(?, ?, ? ...); +** +** The number of bound variables is equal to the number of columns in +** the target table, plus one (for the rbu_control column), plus one more +** (for the rbu_rowid column) if the target table is an implicit IPK or +** virtual table. */ -static RtreeValue rtreeValueDown(sqlite3_value *v){ - double d = sqlite3_value_double(v); - float f = (float)d; - if( f>d ){ - f = (float)(d*(d<0 ? RNDAWAY : RNDTOWARDS)); +static void rbuObjIterPrepareTmpInsert( + sqlite3rbu *p, + RbuObjIter *pIter, + const char *zCollist, + const char *zRbuRowid +){ + int bRbuRowid = (pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE); + char *zBind = rbuObjIterGetBindlist(p, pIter->nTblCol + 1 + bRbuRowid); + if( zBind ){ + assert( pIter->pTmpInsert==0 ); + p->rc = prepareFreeAndCollectError( + p->dbRbu, &pIter->pTmpInsert, &p->zErrmsg, sqlite3_mprintf( + "INSERT INTO %s.'rbu_tmp_%q'(rbu_control,%s%s) VALUES(%z)", + p->zStateDb, pIter->zTbl, zCollist, zRbuRowid, zBind + )); } - return f; } -static RtreeValue rtreeValueUp(sqlite3_value *v){ - double d = sqlite3_value_double(v); - float f = (float)d; - if( f<d ){ - f = (float)(d*(d<0 ? RNDTOWARDS : RNDAWAY)); + +static void rbuTmpInsertFunc( + sqlite3_context *pCtx, + int nVal, + sqlite3_value **apVal +){ + sqlite3rbu *p = sqlite3_user_data(pCtx); + int rc = SQLITE_OK; + int i; + + for(i=0; rc==SQLITE_OK && i<nVal; i++){ + rc = sqlite3_bind_value(p->objiter.pTmpInsert, i+1, apVal[i]); + } + if( rc==SQLITE_OK ){ + sqlite3_step(p->objiter.pTmpInsert); + rc = sqlite3_reset(p->objiter.pTmpInsert); } - return f; -} -#endif /* !defined(SQLITE_RTREE_INT_ONLY) */ + if( rc!=SQLITE_OK ){ + sqlite3_result_error_code(pCtx, rc); + } +} /* -** The xUpdate method for rtree module virtual tables. +** Ensure that the SQLite statement handles required to update the +** target database object currently indicated by the iterator passed +** as the second argument are available. */ -static int rtreeUpdate( - sqlite3_vtab *pVtab, - int nData, - sqlite3_value **azData, - sqlite_int64 *pRowid +static int rbuObjIterPrepareAll( + sqlite3rbu *p, + RbuObjIter *pIter, + int nOffset /* Add "LIMIT -1 OFFSET $nOffset" to SELECT */ ){ - Rtree *pRtree = (Rtree *)pVtab; - int rc = SQLITE_OK; - RtreeCell cell; /* New cell to insert if nData>1 */ - int bHaveRowid = 0; /* Set to 1 after new rowid is determined */ + assert( pIter->bCleanup==0 ); + if( pIter->pSelect==0 && rbuObjIterCacheTableInfo(p, pIter)==SQLITE_OK ){ + const int tnum = pIter->iTnum; + char *zCollist = 0; /* List of indexed columns */ + char **pz = &p->zErrmsg; + const char *zIdx = pIter->zIdx; + char *zLimit = 0; - rtreeReference(pRtree); - assert(nData>=1); + if( nOffset ){ + zLimit = sqlite3_mprintf(" LIMIT -1 OFFSET %d", nOffset); + if( !zLimit ) p->rc = SQLITE_NOMEM; + } - cell.iRowid = 0; /* Used only to suppress a compiler warning */ + if( zIdx ){ + const char *zTbl = pIter->zTbl; + char *zImposterCols = 0; /* Columns for imposter table */ + char *zImposterPK = 0; /* Primary key declaration for imposter */ + char *zWhere = 0; /* WHERE clause on PK columns */ + char *zBind = 0; + int nBind = 0; - /* Constraint handling. A write operation on an r-tree table may return - ** SQLITE_CONSTRAINT for two reasons: - ** - ** 1. A duplicate rowid value, or - ** 2. The supplied data violates the "x2>=x1" constraint. - ** - ** In the first case, if the conflict-handling mode is REPLACE, then - ** the conflicting row can be removed before proceeding. In the second - ** case, SQLITE_CONSTRAINT must be returned regardless of the - ** conflict-handling mode specified by the user. - */ - if( nData>1 ){ - int ii; + assert( pIter->eType!=RBU_PK_VTAB ); + zCollist = rbuObjIterGetIndexCols( + p, pIter, &zImposterCols, &zImposterPK, &zWhere, &nBind + ); + zBind = rbuObjIterGetBindlist(p, nBind); + + /* Create the imposter table used to write to this index. */ + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 1); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 1,tnum); + rbuMPrintfExec(p, p->dbMain, + "CREATE TABLE \"rbu_imp_%w\"( %s, PRIMARY KEY( %s ) ) WITHOUT ROWID", + zTbl, zImposterCols, zImposterPK + ); + sqlite3_test_control(SQLITE_TESTCTRL_IMPOSTER, p->dbMain, "main", 0, 0); + + /* Create the statement to insert index entries */ + pIter->nCol = nBind; + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pInsert, &p->zErrmsg, + sqlite3_mprintf("INSERT INTO \"rbu_imp_%w\" VALUES(%s)", zTbl, zBind) + ); + } - /* Populate the cell.aCoord[] array. The first coordinate is azData[3]. - ** - ** NB: nData can only be less than nDim*2+3 if the rtree is mis-declared - ** with "column" that are interpreted as table constraints. - ** Example: CREATE VIRTUAL TABLE bad USING rtree(x,y,CHECK(y>5)); - ** This problem was discovered after years of use, so we silently ignore - ** these kinds of misdeclared tables to avoid breaking any legacy. - */ - assert( nData<=(pRtree->nDim*2 + 3) ); + /* And to delete index entries */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError( + p->dbMain, &pIter->pDelete, &p->zErrmsg, + sqlite3_mprintf("DELETE FROM \"rbu_imp_%w\" WHERE %s", zTbl, zWhere) + ); + } -#ifndef SQLITE_RTREE_INT_ONLY - if( pRtree->eCoordType==RTREE_COORD_REAL32 ){ - for(ii=0; ii<nData-4; ii+=2){ - cell.aCoord[ii].f = rtreeValueDown(azData[ii+3]); - cell.aCoord[ii+1].f = rtreeValueUp(azData[ii+4]); - if( cell.aCoord[ii].f>cell.aCoord[ii+1].f ){ - rc = SQLITE_CONSTRAINT; - goto constraint; + /* Create the SELECT statement to read keys in sorted order */ + if( p->rc==SQLITE_OK ){ + char *zSql; + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' ORDER BY %s%s", + zCollist, p->zStateDb, pIter->zTbl, + zCollist, zLimit + ); + }else{ + zSql = sqlite3_mprintf( + "SELECT %s, rbu_control FROM 'data_%q' " + "WHERE typeof(rbu_control)='integer' AND rbu_control!=1 " + "UNION ALL " + "SELECT %s, rbu_control FROM %s.'rbu_tmp_%q' " + "ORDER BY %s%s", + zCollist, pIter->zTbl, + zCollist, p->zStateDb, pIter->zTbl, + zCollist, zLimit + ); } + p->rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pSelect, pz, zSql); } - }else -#endif - { - for(ii=0; ii<nData-4; ii+=2){ - cell.aCoord[ii].i = sqlite3_value_int(azData[ii+3]); - cell.aCoord[ii+1].i = sqlite3_value_int(azData[ii+4]); - if( cell.aCoord[ii].i>cell.aCoord[ii+1].i ){ - rc = SQLITE_CONSTRAINT; - goto constraint; - } + + sqlite3_free(zImposterCols); + sqlite3_free(zImposterPK); + sqlite3_free(zWhere); + sqlite3_free(zBind); + }else{ + int bRbuRowid = (pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE); + const char *zTbl = pIter->zTbl; /* Table this step applies to */ + const char *zWrite; /* Imposter table name */ + + char *zBindings = rbuObjIterGetBindlist(p, pIter->nTblCol + bRbuRowid); + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zOldlist = rbuObjIterGetOldlist(p, pIter, "old"); + char *zNewlist = rbuObjIterGetOldlist(p, pIter, "new"); + + zCollist = rbuObjIterGetCollist(p, pIter); + pIter->nCol = pIter->nTblCol; + + /* Create the SELECT statement to read keys from data_xxx */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbRbu, &pIter->pSelect, pz, + sqlite3_mprintf( + "SELECT %s, rbu_control%s FROM 'data_%q'%s", + zCollist, (bRbuRowid ? ", rbu_rowid" : ""), zTbl, zLimit + ) + ); } - } - /* If a rowid value was supplied, check if it is already present in - ** the table. If so, the constraint has failed. */ - if( sqlite3_value_type(azData[2])!=SQLITE_NULL ){ - cell.iRowid = sqlite3_value_int64(azData[2]); - if( sqlite3_value_type(azData[0])==SQLITE_NULL - || sqlite3_value_int64(azData[0])!=cell.iRowid - ){ - int steprc; - sqlite3_bind_int64(pRtree->pReadRowid, 1, cell.iRowid); - steprc = sqlite3_step(pRtree->pReadRowid); - rc = sqlite3_reset(pRtree->pReadRowid); - if( SQLITE_ROW==steprc ){ - if( sqlite3_vtab_on_conflict(pRtree->db)==SQLITE_REPLACE ){ - rc = rtreeDeleteRowid(pRtree, cell.iRowid); - }else{ - rc = SQLITE_CONSTRAINT; - goto constraint; - } - } + /* Create the imposter table or tables (if required). */ + rbuCreateImposterTable(p, pIter); + rbuCreateImposterTable2(p, pIter); + zWrite = (pIter->eType==RBU_PK_VTAB ? "" : "rbu_imp_"); + + /* Create the INSERT statement to write to the target PK b-tree */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pInsert, pz, + sqlite3_mprintf( + "INSERT INTO \"%s%w\"(%s%s) VALUES(%s)", + zWrite, zTbl, zCollist, (bRbuRowid ? ", _rowid_" : ""), zBindings + ) + ); } - bHaveRowid = 1; - } - } - /* If azData[0] is not an SQL NULL value, it is the rowid of a - ** record to delete from the r-tree table. The following block does - ** just that. - */ - if( sqlite3_value_type(azData[0])!=SQLITE_NULL ){ - rc = rtreeDeleteRowid(pRtree, sqlite3_value_int64(azData[0])); - } + /* Create the DELETE statement to write to the target PK b-tree */ + if( p->rc==SQLITE_OK ){ + p->rc = prepareFreeAndCollectError(p->dbMain, &pIter->pDelete, pz, + sqlite3_mprintf( + "DELETE FROM \"%s%w\" WHERE %s", zWrite, zTbl, zWhere + ) + ); + } - /* If the azData[] array contains more than one element, elements - ** (azData[2]..azData[argc-1]) contain a new record to insert into - ** the r-tree structure. - */ - if( rc==SQLITE_OK && nData>1 ){ - /* Insert the new record into the r-tree */ - RtreeNode *pLeaf = 0; + if( pIter->abIndexed ){ + const char *zRbuRowid = ""; + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + zRbuRowid = ", rbu_rowid"; + } - /* Figure out the rowid of the new row. */ - if( bHaveRowid==0 ){ - rc = newRowid(pRtree, &cell.iRowid); - } - *pRowid = cell.iRowid; + /* Create the rbu_tmp_xxx table and the triggers to populate it. */ + rbuMPrintfExec(p, p->dbRbu, + "CREATE TABLE IF NOT EXISTS %s.'rbu_tmp_%q' AS " + "SELECT *%s FROM 'data_%q' WHERE 0;" + , p->zStateDb + , zTbl, (pIter->eType==RBU_PK_EXTERNAL ? ", 0 AS rbu_rowid" : "") + , zTbl + ); - if( rc==SQLITE_OK ){ - rc = ChooseLeaf(pRtree, &cell, 0, &pLeaf); - } - if( rc==SQLITE_OK ){ - int rc2; - pRtree->iReinsertHeight = -1; - rc = rtreeInsertCell(pRtree, pLeaf, &cell, 0); - rc2 = nodeRelease(pRtree, pLeaf); - if( rc==SQLITE_OK ){ - rc = rc2; + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_delete_tr BEFORE DELETE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(2, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update1_tr BEFORE UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(2, %s);" + "END;" + + "CREATE TEMP TRIGGER rbu_update2_tr AFTER UPDATE ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(3, %s);" + "END;", + zWrite, zTbl, zOldlist, + zWrite, zTbl, zOldlist, + zWrite, zTbl, zNewlist + ); + + if( pIter->eType==RBU_PK_EXTERNAL || pIter->eType==RBU_PK_NONE ){ + rbuMPrintfExec(p, p->dbMain, + "CREATE TEMP TRIGGER rbu_insert_tr AFTER INSERT ON \"%s%w\" " + "BEGIN " + " SELECT rbu_tmp_insert(0, %s);" + "END;", + zWrite, zTbl, zNewlist + ); + } + + rbuObjIterPrepareTmpInsert(p, pIter, zCollist, zRbuRowid); } + + sqlite3_free(zWhere); + sqlite3_free(zOldlist); + sqlite3_free(zNewlist); + sqlite3_free(zBindings); } + sqlite3_free(zCollist); + sqlite3_free(zLimit); } - -constraint: - rtreeRelease(pRtree); - return rc; + + return p->rc; } /* -** The xRename method for rtree module virtual tables. +** Set output variable *ppStmt to point to an UPDATE statement that may +** be used to update the imposter table for the main table b-tree of the +** table object that pIter currently points to, assuming that the +** rbu_control column of the data_xyz table contains zMask. +** +** If the zMask string does not specify any columns to update, then this +** is not an error. Output variable *ppStmt is set to NULL in this case. */ -static int rtreeRename(sqlite3_vtab *pVtab, const char *zNewName){ - Rtree *pRtree = (Rtree *)pVtab; - int rc = SQLITE_NOMEM; - char *zSql = sqlite3_mprintf( - "ALTER TABLE %Q.'%q_node' RENAME TO \"%w_node\";" - "ALTER TABLE %Q.'%q_parent' RENAME TO \"%w_parent\";" - "ALTER TABLE %Q.'%q_rowid' RENAME TO \"%w_rowid\";" - , pRtree->zDb, pRtree->zName, zNewName - , pRtree->zDb, pRtree->zName, zNewName - , pRtree->zDb, pRtree->zName, zNewName - ); - if( zSql ){ - rc = sqlite3_exec(pRtree->db, zSql, 0, 0, 0); - sqlite3_free(zSql); - } - return rc; -} +static int rbuGetUpdateStmt( + sqlite3rbu *p, /* RBU handle */ + RbuObjIter *pIter, /* Object iterator */ + const char *zMask, /* rbu_control value ('x.x.') */ + sqlite3_stmt **ppStmt /* OUT: UPDATE statement handle */ +){ + RbuUpdateStmt **pp; + RbuUpdateStmt *pUp = 0; + int nUp = 0; -/* -** This function populates the pRtree->nRowEst variable with an estimate -** of the number of rows in the virtual table. If possible, this is based -** on sqlite_stat1 data. Otherwise, use RTREE_DEFAULT_ROWEST. -*/ -static int rtreeQueryStat1(sqlite3 *db, Rtree *pRtree){ - const char *zFmt = "SELECT stat FROM %Q.sqlite_stat1 WHERE tbl = '%q_rowid'"; - char *zSql; - sqlite3_stmt *p; - int rc; - i64 nRow = 0; + /* In case an error occurs */ + *ppStmt = 0; - zSql = sqlite3_mprintf(zFmt, pRtree->zDb, pRtree->zName); - if( zSql==0 ){ - rc = SQLITE_NOMEM; - }else{ - rc = sqlite3_prepare_v2(db, zSql, -1, &p, 0); - if( rc==SQLITE_OK ){ - if( sqlite3_step(p)==SQLITE_ROW ) nRow = sqlite3_column_int64(p, 0); - rc = sqlite3_finalize(p); - }else if( rc!=SQLITE_NOMEM ){ - rc = SQLITE_OK; + /* Search for an existing statement. If one is found, shift it to the front + ** of the LRU queue and return immediately. Otherwise, leave nUp pointing + ** to the number of statements currently in the cache and pUp to the + ** last object in the list. */ + for(pp=&pIter->pRbuUpdate; *pp; pp=&((*pp)->pNext)){ + pUp = *pp; + if( strcmp(pUp->zMask, zMask)==0 ){ + *pp = pUp->pNext; + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; + *ppStmt = pUp->pUpdate; + return SQLITE_OK; } + nUp++; + } + assert( pUp==0 || pUp->pNext==0 ); - if( rc==SQLITE_OK ){ - if( nRow==0 ){ - pRtree->nRowEst = RTREE_DEFAULT_ROWEST; - }else{ - pRtree->nRowEst = MAX(nRow, RTREE_MIN_ROWEST); - } - } - sqlite3_free(zSql); + if( nUp>=SQLITE_RBU_UPDATE_CACHESIZE ){ + for(pp=&pIter->pRbuUpdate; *pp!=pUp; pp=&((*pp)->pNext)); + *pp = 0; + sqlite3_finalize(pUp->pUpdate); + pUp->pUpdate = 0; + }else{ + pUp = (RbuUpdateStmt*)rbuMalloc(p, sizeof(RbuUpdateStmt)+pIter->nTblCol+1); } - return rc; -} + if( pUp ){ + char *zWhere = rbuObjIterGetWhere(p, pIter); + char *zSet = rbuObjIterGetSetlist(p, pIter, zMask); + char *zUpdate = 0; -static sqlite3_module rtreeModule = { - 0, /* iVersion */ - rtreeCreate, /* xCreate - create a table */ - rtreeConnect, /* xConnect - connect to an existing table */ - rtreeBestIndex, /* xBestIndex - Determine search strategy */ - rtreeDisconnect, /* xDisconnect - Disconnect from a table */ - rtreeDestroy, /* xDestroy - Drop a table */ - rtreeOpen, /* xOpen - open a cursor */ - rtreeClose, /* xClose - close a cursor */ - rtreeFilter, /* xFilter - configure scan constraints */ - rtreeNext, /* xNext - advance a cursor */ - rtreeEof, /* xEof */ - rtreeColumn, /* xColumn - read data */ - rtreeRowid, /* xRowid - read data */ - rtreeUpdate, /* xUpdate - write data */ - 0, /* xBegin - begin transaction */ - 0, /* xSync - sync transaction */ - 0, /* xCommit - commit transaction */ - 0, /* xRollback - rollback transaction */ - 0, /* xFindFunction - function overloading */ - rtreeRename, /* xRename - rename the table */ - 0, /* xSavepoint */ - 0, /* xRelease */ - 0 /* xRollbackTo */ -}; + pUp->zMask = (char*)&pUp[1]; + memcpy(pUp->zMask, zMask, pIter->nTblCol); + pUp->pNext = pIter->pRbuUpdate; + pIter->pRbuUpdate = pUp; -static int rtreeSqlInit( - Rtree *pRtree, - sqlite3 *db, - const char *zDb, - const char *zPrefix, - int isCreate -){ - int rc = SQLITE_OK; + if( zSet ){ + const char *zPrefix = ""; - #define N_STATEMENT 9 - static const char *azSql[N_STATEMENT] = { - /* Read and write the xxx_node table */ - "SELECT data FROM '%q'.'%q_node' WHERE nodeno = :1", - "INSERT OR REPLACE INTO '%q'.'%q_node' VALUES(:1, :2)", - "DELETE FROM '%q'.'%q_node' WHERE nodeno = :1", + if( pIter->eType!=RBU_PK_VTAB ) zPrefix = "rbu_imp_"; + zUpdate = sqlite3_mprintf("UPDATE \"%s%w\" SET %s WHERE %s", + zPrefix, pIter->zTbl, zSet, zWhere + ); + p->rc = prepareFreeAndCollectError( + p->dbMain, &pUp->pUpdate, &p->zErrmsg, zUpdate + ); + *ppStmt = pUp->pUpdate; + } + sqlite3_free(zWhere); + sqlite3_free(zSet); + } - /* Read and write the xxx_rowid table */ - "SELECT nodeno FROM '%q'.'%q_rowid' WHERE rowid = :1", - "INSERT OR REPLACE INTO '%q'.'%q_rowid' VALUES(:1, :2)", - "DELETE FROM '%q'.'%q_rowid' WHERE rowid = :1", + return p->rc; +} - /* Read and write the xxx_parent table */ - "SELECT parentnode FROM '%q'.'%q_parent' WHERE nodeno = :1", - "INSERT OR REPLACE INTO '%q'.'%q_parent' VALUES(:1, :2)", - "DELETE FROM '%q'.'%q_parent' WHERE nodeno = :1" - }; - sqlite3_stmt **appStmt[N_STATEMENT]; - int i; +static sqlite3 *rbuOpenDbhandle(sqlite3rbu *p, const char *zName){ + sqlite3 *db = 0; + if( p->rc==SQLITE_OK ){ + const int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_URI; + p->rc = sqlite3_open_v2(zName, &db, flags, p->zVfsName); + if( p->rc ){ + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + sqlite3_close(db); + db = 0; + } + } + return db; +} - pRtree->db = db; +/* +** Open the database handle and attach the RBU database as "rbu". If an +** error occurs, leave an error code and message in the RBU handle. +*/ +static void rbuOpenDatabase(sqlite3rbu *p){ + assert( p->rc==SQLITE_OK ); + assert( p->dbMain==0 && p->dbRbu==0 ); - if( isCreate ){ - char *zCreate = sqlite3_mprintf( -"CREATE TABLE \"%w\".\"%w_node\"(nodeno INTEGER PRIMARY KEY, data BLOB);" -"CREATE TABLE \"%w\".\"%w_rowid\"(rowid INTEGER PRIMARY KEY, nodeno INTEGER);" -"CREATE TABLE \"%w\".\"%w_parent\"(nodeno INTEGER PRIMARY KEY," - " parentnode INTEGER);" -"INSERT INTO '%q'.'%q_node' VALUES(1, zeroblob(%d))", - zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, zDb, zPrefix, pRtree->iNodeSize + p->eStage = 0; + p->dbMain = rbuOpenDbhandle(p, p->zTarget); + p->dbRbu = rbuOpenDbhandle(p, p->zRbu); + + /* If using separate RBU and state databases, attach the state database to + ** the RBU db handle now. */ + if( p->zState ){ + rbuMPrintfExec(p, p->dbRbu, "ATTACH %Q AS stat", p->zState); + memcpy(p->zStateDb, "stat", 4); + }else{ + memcpy(p->zStateDb, "main", 4); + } + + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_create_function(p->dbMain, + "rbu_tmp_insert", -1, SQLITE_UTF8, (void*)p, rbuTmpInsertFunc, 0, 0 ); - if( !zCreate ){ - return SQLITE_NOMEM; - } - rc = sqlite3_exec(db, zCreate, 0, 0, 0); - sqlite3_free(zCreate); - if( rc!=SQLITE_OK ){ - return rc; - } } - appStmt[0] = &pRtree->pReadNode; - appStmt[1] = &pRtree->pWriteNode; - appStmt[2] = &pRtree->pDeleteNode; - appStmt[3] = &pRtree->pReadRowid; - appStmt[4] = &pRtree->pWriteRowid; - appStmt[5] = &pRtree->pDeleteRowid; - appStmt[6] = &pRtree->pReadParent; - appStmt[7] = &pRtree->pWriteParent; - appStmt[8] = &pRtree->pDeleteParent; + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); + } + rbuMPrintfExec(p, p->dbMain, "SELECT * FROM sqlite_master"); - rc = rtreeQueryStat1(db, pRtree); - for(i=0; i<N_STATEMENT && rc==SQLITE_OK; i++){ - char *zSql = sqlite3_mprintf(azSql[i], zDb, zPrefix); - if( zSql ){ - rc = sqlite3_prepare_v2(db, zSql, -1, appStmt[i], 0); - }else{ - rc = SQLITE_NOMEM; - } - sqlite3_free(zSql); + /* Mark the database file just opened as an RBU target database. If + ** this call returns SQLITE_NOTFOUND, then the RBU vfs is not in use. + ** This is an error. */ + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_file_control(p->dbMain, "main", SQLITE_FCNTL_RBU, (void*)p); } - return rc; + if( p->rc==SQLITE_NOTFOUND ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("rbu vfs not found"); + } } /* -** The second argument to this function contains the text of an SQL statement -** that returns a single integer value. The statement is compiled and executed -** using database connection db. If successful, the integer value returned -** is written to *piVal and SQLITE_OK returned. Otherwise, an SQLite error -** code is returned and the value of *piVal after returning is not defined. +** This routine is a copy of the sqlite3FileSuffix3() routine from the core. +** It is a no-op unless SQLITE_ENABLE_8_3_NAMES is defined. +** +** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database +** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and +** if filename in z[] has a suffix (a.k.a. "extension") that is longer than +** three characters, then shorten the suffix on z[] to be the last three +** characters of the original suffix. +** +** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always +** do the suffix shortening regardless of URI parameter. +** +** Examples: +** +** test.db-journal => test.nal +** test.db-wal => test.wal +** test.db-shm => test.shm +** test.db-mj7f3319fa => test.9fa */ -static int getIntFromStmt(sqlite3 *db, const char *zSql, int *piVal){ - int rc = SQLITE_NOMEM; - if( zSql ){ - sqlite3_stmt *pStmt = 0; - rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); - if( rc==SQLITE_OK ){ - if( SQLITE_ROW==sqlite3_step(pStmt) ){ - *piVal = sqlite3_column_int(pStmt, 0); - } - rc = sqlite3_finalize(pStmt); +static void rbuFileSuffix3(const char *zBase, char *z){ +#ifdef SQLITE_ENABLE_8_3_NAMES +#if SQLITE_ENABLE_8_3_NAMES<2 + if( sqlite3_uri_boolean(zBase, "8_3_names", 0) ) +#endif + { + int i, sz; + sz = sqlite3Strlen30(z); + for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} + if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); + } +#endif +} + +/* +** Return the current wal-index header checksum for the target database +** as a 64-bit integer. +** +** The checksum is store in the first page of xShmMap memory as an 8-byte +** blob starting at byte offset 40. +*/ +static i64 rbuShmChecksum(sqlite3rbu *p){ + i64 iRet = 0; + if( p->rc==SQLITE_OK ){ + sqlite3_file *pDb = p->pTargetFd->pReal; + u32 volatile *ptr; + p->rc = pDb->pMethods->xShmMap(pDb, 0, 32*1024, 0, (void volatile**)&ptr); + if( p->rc==SQLITE_OK ){ + iRet = ((i64)ptr[10] << 32) + ptr[11]; } } - return rc; + return iRet; } /* -** This function is called from within the xConnect() or xCreate() method to -** determine the node-size used by the rtree table being created or connected -** to. If successful, pRtree->iNodeSize is populated and SQLITE_OK returned. -** Otherwise, an SQLite error code is returned. +** This function is called as part of initializing or reinitializing an +** incremental checkpoint. ** -** If this function is being called as part of an xConnect(), then the rtree -** table already exists. In this case the node-size is determined by inspecting -** the root node of the tree. +** It populates the sqlite3rbu.aFrame[] array with the set of +** (wal frame -> db page) copy operations required to checkpoint the +** current wal file, and obtains the set of shm locks required to safely +** perform the copy operations directly on the file-system. ** -** Otherwise, for an xCreate(), use 64 bytes less than the database page-size. -** This ensures that each node is stored on a single database page. If the -** database page-size is so large that more than RTREE_MAXCELLS entries -** would fit in a single node, use a smaller node-size. +** If argument pState is not NULL, then the incremental checkpoint is +** being resumed. In this case, if the checksum of the wal-index-header +** following recovery is not the same as the checksum saved in the RbuState +** object, then the rbu handle is set to DONE state. This occurs if some +** other client appends a transaction to the wal file in the middle of +** an incremental checkpoint. */ -static int getNodeSize( - sqlite3 *db, /* Database handle */ - Rtree *pRtree, /* Rtree handle */ - int isCreate, /* True for xCreate, false for xConnect */ - char **pzErr /* OUT: Error message, if any */ -){ - int rc; - char *zSql; - if( isCreate ){ - int iPageSize = 0; - zSql = sqlite3_mprintf("PRAGMA %Q.page_size", pRtree->zDb); - rc = getIntFromStmt(db, zSql, &iPageSize); - if( rc==SQLITE_OK ){ - pRtree->iNodeSize = iPageSize-64; - if( (4+pRtree->nBytesPerCell*RTREE_MAXCELLS)<pRtree->iNodeSize ){ - pRtree->iNodeSize = 4+pRtree->nBytesPerCell*RTREE_MAXCELLS; - } - }else{ - *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); - } - }else{ - zSql = sqlite3_mprintf( - "SELECT length(data) FROM '%q'.'%q_node' WHERE nodeno = 1", - pRtree->zDb, pRtree->zName - ); - rc = getIntFromStmt(db, zSql, &pRtree->iNodeSize); - if( rc!=SQLITE_OK ){ - *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); +static void rbuSetupCheckpoint(sqlite3rbu *p, RbuState *pState){ + + /* If pState is NULL, then the wal file may not have been opened and + ** recovered. Running a read-statement here to ensure that doing so + ** does not interfere with the "capture" process below. */ + if( pState==0 ){ + p->eStage = 0; + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbMain, "SELECT * FROM sqlite_master", 0, 0, 0); } } - sqlite3_free(zSql); - return rc; + /* Assuming no error has occurred, run a "restart" checkpoint with the + ** sqlite3rbu.eStage variable set to CAPTURE. This turns on the following + ** special behaviour in the rbu VFS: + ** + ** * If the exclusive shm WRITER or READ0 lock cannot be obtained, + ** the checkpoint fails with SQLITE_BUSY (normally SQLite would + ** proceed with running a passive checkpoint instead of failing). + ** + ** * Attempts to read from the *-wal file or write to the database file + ** do not perform any IO. Instead, the frame/page combinations that + ** would be read/written are recorded in the sqlite3rbu.aFrame[] + ** array. + ** + ** * Calls to xShmLock(UNLOCK) to release the exclusive shm WRITER, + ** READ0 and CHECKPOINT locks taken as part of the checkpoint are + ** no-ops. These locks will not be released until the connection + ** is closed. + ** + ** * Attempting to xSync() the database file causes an SQLITE_INTERNAL + ** error. + ** + ** As a result, unless an error (i.e. OOM or SQLITE_BUSY) occurs, the + ** checkpoint below fails with SQLITE_INTERNAL, and leaves the aFrame[] + ** array populated with a set of (frame -> page) mappings. Because the + ** WRITER, CHECKPOINT and READ0 locks are still held, it is safe to copy + ** data from the wal file into the database file according to the + ** contents of aFrame[]. + */ + if( p->rc==SQLITE_OK ){ + int rc2; + p->eStage = RBU_STAGE_CAPTURE; + rc2 = sqlite3_exec(p->dbMain, "PRAGMA main.wal_checkpoint=restart", 0, 0,0); + if( rc2!=SQLITE_INTERNAL ) p->rc = rc2; + } + + if( p->rc==SQLITE_OK ){ + p->eStage = RBU_STAGE_CKPT; + p->nStep = (pState ? pState->nRow : 0); + p->aBuf = rbuMalloc(p, p->pgsz); + p->iWalCksum = rbuShmChecksum(p); + } + + if( p->rc==SQLITE_OK && pState && pState->iWalCksum!=p->iWalCksum ){ + p->rc = SQLITE_DONE; + p->eStage = RBU_STAGE_DONE; + } } -/* -** This function is the implementation of both the xConnect and xCreate -** methods of the r-tree virtual table. -** -** argv[0] -> module name -** argv[1] -> database name -** argv[2] -> table name -** argv[...] -> column names... +/* +** Called when iAmt bytes are read from offset iOff of the wal file while +** the rbu object is in capture mode. Record the frame number of the frame +** being read in the aFrame[] array. */ -static int rtreeInit( - sqlite3 *db, /* Database connection */ - void *pAux, /* One of the RTREE_COORD_* constants */ - int argc, const char *const*argv, /* Parameters to CREATE TABLE statement */ - sqlite3_vtab **ppVtab, /* OUT: New virtual table */ - char **pzErr, /* OUT: Error message, if any */ - int isCreate /* True for xCreate, false for xConnect */ -){ - int rc = SQLITE_OK; - Rtree *pRtree; - int nDb; /* Length of string argv[1] */ - int nName; /* Length of string argv[2] */ - int eCoordType = (pAux ? RTREE_COORD_INT32 : RTREE_COORD_REAL32); +static int rbuCaptureWalRead(sqlite3rbu *pRbu, i64 iOff, int iAmt){ + const u32 mReq = (1<<WAL_LOCK_WRITE)|(1<<WAL_LOCK_CKPT)|(1<<WAL_LOCK_READ0); + u32 iFrame; - const char *aErrMsg[] = { - 0, /* 0 */ - "Wrong number of columns for an rtree table", /* 1 */ - "Too few columns for an rtree table", /* 2 */ - "Too many columns for an rtree table" /* 3 */ - }; + if( pRbu->mLock!=mReq ){ + pRbu->rc = SQLITE_BUSY; + return SQLITE_INTERNAL; + } - int iErr = (argc<6) ? 2 : argc>(RTREE_MAX_DIMENSIONS*2+4) ? 3 : argc%2; - if( aErrMsg[iErr] ){ - *pzErr = sqlite3_mprintf("%s", aErrMsg[iErr]); - return SQLITE_ERROR; + pRbu->pgsz = iAmt; + if( pRbu->nFrame==pRbu->nFrameAlloc ){ + int nNew = (pRbu->nFrameAlloc ? pRbu->nFrameAlloc : 64) * 2; + RbuFrame *aNew; + aNew = (RbuFrame*)sqlite3_realloc(pRbu->aFrame, nNew * sizeof(RbuFrame)); + if( aNew==0 ) return SQLITE_NOMEM; + pRbu->aFrame = aNew; + pRbu->nFrameAlloc = nNew; } - sqlite3_vtab_config(db, SQLITE_VTAB_CONSTRAINT_SUPPORT, 1); + iFrame = (u32)((iOff-32) / (i64)(iAmt+24)) + 1; + if( pRbu->iMaxFrame<iFrame ) pRbu->iMaxFrame = iFrame; + pRbu->aFrame[pRbu->nFrame].iWalFrame = iFrame; + pRbu->aFrame[pRbu->nFrame].iDbPage = 0; + pRbu->nFrame++; + return SQLITE_OK; +} - /* Allocate the sqlite3_vtab structure */ - nDb = (int)strlen(argv[1]); - nName = (int)strlen(argv[2]); - pRtree = (Rtree *)sqlite3_malloc(sizeof(Rtree)+nDb+nName+2); - if( !pRtree ){ - return SQLITE_NOMEM; +/* +** Called when a page of data is written to offset iOff of the database +** file while the rbu handle is in capture mode. Record the page number +** of the page being written in the aFrame[] array. +*/ +static int rbuCaptureDbWrite(sqlite3rbu *pRbu, i64 iOff){ + pRbu->aFrame[pRbu->nFrame-1].iDbPage = (u32)(iOff / pRbu->pgsz) + 1; + return SQLITE_OK; +} + +/* +** This is called as part of an incremental checkpoint operation. Copy +** a single frame of data from the wal file into the database file, as +** indicated by the RbuFrame object. +*/ +static void rbuCheckpointFrame(sqlite3rbu *p, RbuFrame *pFrame){ + sqlite3_file *pWal = p->pTargetFd->pWalFd->pReal; + sqlite3_file *pDb = p->pTargetFd->pReal; + i64 iOff; + + assert( p->rc==SQLITE_OK ); + iOff = (i64)(pFrame->iWalFrame-1) * (p->pgsz + 24) + 32 + 24; + p->rc = pWal->pMethods->xRead(pWal, p->aBuf, p->pgsz, iOff); + if( p->rc ) return; + + iOff = (i64)(pFrame->iDbPage-1) * p->pgsz; + p->rc = pDb->pMethods->xWrite(pDb, p->aBuf, p->pgsz, iOff); +} + + +/* +** Take an EXCLUSIVE lock on the database file. +*/ +static void rbuLockDatabase(sqlite3rbu *p){ + sqlite3_file *pReal = p->pTargetFd->pReal; + assert( p->rc==SQLITE_OK ); + p->rc = pReal->pMethods->xLock(pReal, SQLITE_LOCK_SHARED); + if( p->rc==SQLITE_OK ){ + p->rc = pReal->pMethods->xLock(pReal, SQLITE_LOCK_EXCLUSIVE); } - memset(pRtree, 0, sizeof(Rtree)+nDb+nName+2); - pRtree->nBusy = 1; - pRtree->base.pModule = &rtreeModule; - pRtree->zDb = (char *)&pRtree[1]; - pRtree->zName = &pRtree->zDb[nDb+1]; - pRtree->nDim = (argc-4)/2; - pRtree->nBytesPerCell = 8 + pRtree->nDim*4*2; - pRtree->eCoordType = eCoordType; - memcpy(pRtree->zDb, argv[1], nDb); - memcpy(pRtree->zName, argv[2], nName); +} - /* Figure out the node size to use. */ - rc = getNodeSize(db, pRtree, isCreate, pzErr); +/* +** The RBU handle is currently in RBU_STAGE_OAL state, with a SHARED lock +** on the database file. This proc moves the *-oal file to the *-wal path, +** then reopens the database file (this time in vanilla, non-oal, WAL mode). +** If an error occurs, leave an error code and error message in the rbu +** handle. +*/ +static void rbuMoveOalFile(sqlite3rbu *p){ + const char *zBase = sqlite3_db_filename(p->dbMain, "main"); - /* Create/Connect to the underlying relational database schema. If - ** that is successful, call sqlite3_declare_vtab() to configure - ** the r-tree table schema. - */ - if( rc==SQLITE_OK ){ - if( (rc = rtreeSqlInit(pRtree, db, argv[1], argv[2], isCreate)) ){ - *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); - }else{ - char *zSql = sqlite3_mprintf("CREATE TABLE x(%s", argv[3]); - char *zTmp; - int ii; - for(ii=4; zSql && ii<argc; ii++){ - zTmp = zSql; - zSql = sqlite3_mprintf("%s, %s", zTmp, argv[ii]); - sqlite3_free(zTmp); - } - if( zSql ){ - zTmp = zSql; - zSql = sqlite3_mprintf("%s);", zTmp); - sqlite3_free(zTmp); - } - if( !zSql ){ - rc = SQLITE_NOMEM; - }else if( SQLITE_OK!=(rc = sqlite3_declare_vtab(db, zSql)) ){ - *pzErr = sqlite3_mprintf("%s", sqlite3_errmsg(db)); + char *zWal = sqlite3_mprintf("%s-wal", zBase); + char *zOal = sqlite3_mprintf("%s-oal", zBase); + + assert( p->eStage==RBU_STAGE_MOVE ); + assert( p->rc==SQLITE_OK && p->zErrmsg==0 ); + if( zWal==0 || zOal==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + /* Move the *-oal file to *-wal. At this point connection p->db is + ** holding a SHARED lock on the target database file (because it is + ** in WAL mode). So no other connection may be writing the db. + ** + ** In order to ensure that there are no database readers, an EXCLUSIVE + ** lock is obtained here before the *-oal is moved to *-wal. + */ + rbuLockDatabase(p); + if( p->rc==SQLITE_OK ){ + rbuFileSuffix3(zBase, zWal); + rbuFileSuffix3(zBase, zOal); + + /* Re-open the databases. */ + rbuObjIterFinalize(&p->objiter); + sqlite3_close(p->dbMain); + sqlite3_close(p->dbRbu); + p->rc = rename(zOal, zWal) ? SQLITE_IOERR : SQLITE_OK; + if( p->rc==SQLITE_OK ){ + p->dbMain = 0; + p->dbRbu = 0; + rbuOpenDatabase(p); + rbuSetupCheckpoint(p, 0); } - sqlite3_free(zSql); } } - if( rc==SQLITE_OK ){ - *ppVtab = (sqlite3_vtab *)pRtree; - }else{ - assert( *ppVtab==0 ); - assert( pRtree->nBusy==1 ); - rtreeRelease(pRtree); - } - return rc; + sqlite3_free(zWal); + sqlite3_free(zOal); } - /* -** Implementation of a scalar function that decodes r-tree nodes to -** human readable strings. This can be used for debugging and analysis. +** The SELECT statement iterating through the keys for the current object +** (p->objiter.pSelect) currently points to a valid row. This function +** determines the type of operation requested by this row and returns +** one of the following values to indicate the result: ** -** The scalar function takes two arguments: (1) the number of dimensions -** to the rtree (between 1 and 5, inclusive) and (2) a blob of data containing -** an r-tree node. For a two-dimensional r-tree structure called "rt", to -** deserialize all nodes, a statement like: +** * RBU_INSERT +** * RBU_DELETE +** * RBU_IDX_DELETE +** * RBU_UPDATE ** -** SELECT rtreenode(2, data) FROM rt_node; +** If RBU_UPDATE is returned, then output variable *pzMask is set to +** point to the text value indicating the columns to update. ** -** The human readable string takes the form of a Tcl list with one -** entry for each cell in the r-tree node. Each entry is itself a -** list, containing the 8-byte rowid/pageno followed by the -** <num-dimension>*2 coordinates. +** If the rbu_control field contains an invalid value, an error code and +** message are left in the RBU handle and zero returned. */ -static void rtreenode(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ - char *zText = 0; - RtreeNode node; - Rtree tree; - int ii; +static int rbuStepType(sqlite3rbu *p, const char **pzMask){ + int iCol = p->objiter.nCol; /* Index of rbu_control column */ + int res = 0; /* Return value */ - UNUSED_PARAMETER(nArg); - memset(&node, 0, sizeof(RtreeNode)); - memset(&tree, 0, sizeof(Rtree)); - tree.nDim = sqlite3_value_int(apArg[0]); - tree.nBytesPerCell = 8 + 8 * tree.nDim; - node.zData = (u8 *)sqlite3_value_blob(apArg[1]); + switch( sqlite3_column_type(p->objiter.pSelect, iCol) ){ + case SQLITE_INTEGER: { + int iVal = sqlite3_column_int(p->objiter.pSelect, iCol); + if( iVal==0 ){ + res = RBU_INSERT; + }else if( iVal==1 ){ + res = RBU_DELETE; + }else if( iVal==2 ){ + res = RBU_IDX_DELETE; + }else if( iVal==3 ){ + res = RBU_IDX_INSERT; + } + break; + } - for(ii=0; ii<NCELL(&node); ii++){ - char zCell[512]; - int nCell = 0; - RtreeCell cell; - int jj; + case SQLITE_TEXT: { + const unsigned char *z = sqlite3_column_text(p->objiter.pSelect, iCol); + if( z==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + *pzMask = (const char*)z; + } + res = RBU_UPDATE; - nodeGetCell(&tree, &node, ii, &cell); - sqlite3_snprintf(512-nCell,&zCell[nCell],"%lld", cell.iRowid); - nCell = (int)strlen(zCell); - for(jj=0; jj<tree.nDim*2; jj++){ -#ifndef SQLITE_RTREE_INT_ONLY - sqlite3_snprintf(512-nCell,&zCell[nCell], " %g", - (double)cell.aCoord[jj].f); -#else - sqlite3_snprintf(512-nCell,&zCell[nCell], " %d", - cell.aCoord[jj].i); -#endif - nCell = (int)strlen(zCell); + break; } - if( zText ){ - char *zTextNew = sqlite3_mprintf("%s {%s}", zText, zCell); - sqlite3_free(zText); - zText = zTextNew; - }else{ - zText = sqlite3_mprintf("{%s}", zCell); - } + default: + break; } - - sqlite3_result_text(ctx, zText, -1, sqlite3_free); + + if( res==0 ){ + rbuBadControlError(p); + } + return res; } -/* This routine implements an SQL function that returns the "depth" parameter -** from the front of a blob that is an r-tree node. For example: +#ifdef SQLITE_DEBUG +/* +** Assert that column iCol of statement pStmt is named zName. +*/ +static void assertColumnName(sqlite3_stmt *pStmt, int iCol, const char *zName){ + const char *zCol = sqlite3_column_name(pStmt, iCol); + assert( 0==sqlite3_stricmp(zName, zCol) ); +} +#else +# define assertColumnName(x,y,z) +#endif + +/* +** This function does the work for an sqlite3rbu_step() call. ** -** SELECT rtreedepth(data) FROM rt_node WHERE nodeno=1; +** The object-iterator (p->objiter) currently points to a valid object, +** and the input cursor (p->objiter.pSelect) currently points to a valid +** input row. Perform whatever processing is required and return. ** -** The depth value is 0 for all nodes other than the root node, and the root -** node always has nodeno=1, so the example above is the primary use for this -** routine. This routine is intended for testing and analysis only. +** If no error occurs, SQLITE_OK is returned. Otherwise, an error code +** and message is left in the RBU handle and a copy of the error code +** returned. */ -static void rtreedepth(sqlite3_context *ctx, int nArg, sqlite3_value **apArg){ - UNUSED_PARAMETER(nArg); - if( sqlite3_value_type(apArg[0])!=SQLITE_BLOB - || sqlite3_value_bytes(apArg[0])<2 - ){ - sqlite3_result_error(ctx, "Invalid argument to rtreedepth()", -1); - }else{ - u8 *zBlob = (u8 *)sqlite3_value_blob(apArg[0]); - sqlite3_result_int(ctx, readInt16(zBlob)); +static int rbuStep(sqlite3rbu *p){ + RbuObjIter *pIter = &p->objiter; + const char *zMask = 0; + int i; + int eType = rbuStepType(p, &zMask); + + if( eType ){ + assert( eType!=RBU_UPDATE || pIter->zIdx==0 ); + + if( pIter->zIdx==0 && eType==RBU_IDX_DELETE ){ + rbuBadControlError(p); + } + else if( + eType==RBU_INSERT + || eType==RBU_DELETE + || eType==RBU_IDX_DELETE + || eType==RBU_IDX_INSERT + ){ + sqlite3_value *pVal; + sqlite3_stmt *pWriter; + + assert( eType!=RBU_UPDATE ); + assert( eType!=RBU_DELETE || pIter->zIdx==0 ); + + if( eType==RBU_IDX_DELETE || eType==RBU_DELETE ){ + pWriter = pIter->pDelete; + }else{ + pWriter = pIter->pInsert; + } + + for(i=0; i<pIter->nCol; i++){ + /* If this is an INSERT into a table b-tree and the table has an + ** explicit INTEGER PRIMARY KEY, check that this is not an attempt + ** to write a NULL into the IPK column. That is not permitted. */ + if( eType==RBU_INSERT + && pIter->zIdx==0 && pIter->eType==RBU_PK_IPK && pIter->abTblPk[i] + && sqlite3_column_type(pIter->pSelect, i)==SQLITE_NULL + ){ + p->rc = SQLITE_MISMATCH; + p->zErrmsg = sqlite3_mprintf("datatype mismatch"); + goto step_out; + } + + if( eType==RBU_DELETE && pIter->abTblPk[i]==0 ){ + continue; + } + + pVal = sqlite3_column_value(pIter->pSelect, i); + p->rc = sqlite3_bind_value(pWriter, i+1, pVal); + if( p->rc ) goto step_out; + } + if( pIter->zIdx==0 + && (pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE) + ){ + /* For a virtual table, or a table with no primary key, the + ** SELECT statement is: + ** + ** SELECT <cols>, rbu_control, rbu_rowid FROM .... + ** + ** Hence column_value(pIter->nCol+1). + */ + assertColumnName(pIter->pSelect, pIter->nCol+1, "rbu_rowid"); + pVal = sqlite3_column_value(pIter->pSelect, pIter->nCol+1); + p->rc = sqlite3_bind_value(pWriter, pIter->nCol+1, pVal); + } + if( p->rc==SQLITE_OK ){ + sqlite3_step(pWriter); + p->rc = resetAndCollectError(pWriter, &p->zErrmsg); + } + }else{ + sqlite3_value *pVal; + sqlite3_stmt *pUpdate = 0; + assert( eType==RBU_UPDATE ); + rbuGetUpdateStmt(p, pIter, zMask, &pUpdate); + if( pUpdate ){ + for(i=0; p->rc==SQLITE_OK && i<pIter->nCol; i++){ + char c = zMask[pIter->aiSrcOrder[i]]; + pVal = sqlite3_column_value(pIter->pSelect, i); + if( pIter->abTblPk[i] || c=='x' || c=='d' ){ + p->rc = sqlite3_bind_value(pUpdate, i+1, pVal); + } + } + if( p->rc==SQLITE_OK + && (pIter->eType==RBU_PK_VTAB || pIter->eType==RBU_PK_NONE) + ){ + /* Bind the rbu_rowid value to column _rowid_ */ + assertColumnName(pIter->pSelect, pIter->nCol+1, "rbu_rowid"); + pVal = sqlite3_column_value(pIter->pSelect, pIter->nCol+1); + p->rc = sqlite3_bind_value(pUpdate, pIter->nCol+1, pVal); + } + if( p->rc==SQLITE_OK ){ + sqlite3_step(pUpdate); + p->rc = resetAndCollectError(pUpdate, &p->zErrmsg); + } + } + } } + + step_out: + return p->rc; } /* -** Register the r-tree module with database handle db. This creates the -** virtual table module "rtree" and the debugging/analysis scalar -** function "rtreenode". +** Increment the schema cookie of the main database opened by p->dbMain. */ -SQLITE_PRIVATE int sqlite3RtreeInit(sqlite3 *db){ - const int utf8 = SQLITE_UTF8; - int rc; +static void rbuIncrSchemaCookie(sqlite3rbu *p){ + if( p->rc==SQLITE_OK ){ + int iCookie = 1000000; + sqlite3_stmt *pStmt; - rc = sqlite3_create_function(db, "rtreenode", 2, utf8, 0, rtreenode, 0, 0); - if( rc==SQLITE_OK ){ - rc = sqlite3_create_function(db, "rtreedepth", 1, utf8, 0,rtreedepth, 0, 0); - } - if( rc==SQLITE_OK ){ -#ifdef SQLITE_RTREE_INT_ONLY - void *c = (void *)RTREE_COORD_INT32; -#else - void *c = (void *)RTREE_COORD_REAL32; -#endif - rc = sqlite3_create_module_v2(db, "rtree", &rtreeModule, c, 0); - } - if( rc==SQLITE_OK ){ - void *c = (void *)RTREE_COORD_INT32; - rc = sqlite3_create_module_v2(db, "rtree_i32", &rtreeModule, c, 0); + p->rc = prepareAndCollectError(p->dbMain, &pStmt, &p->zErrmsg, + "PRAGMA schema_version" + ); + if( p->rc==SQLITE_OK ){ + /* Coverage: it may be that this sqlite3_step() cannot fail. There + ** is already a transaction open, so the prepared statement cannot + ** throw an SQLITE_SCHEMA exception. The only database page the + ** statement reads is page 1, which is guaranteed to be in the cache. + ** And no memory allocations are required. */ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + iCookie = sqlite3_column_int(pStmt, 0); + } + rbuFinalize(p, pStmt); + } + if( p->rc==SQLITE_OK ){ + rbuMPrintfExec(p, p->dbMain, "PRAGMA schema_version = %d", iCookie+1); + } } - - return rc; } /* -** This routine deletes the RtreeGeomCallback object that was attached -** one of the SQL functions create by sqlite3_rtree_geometry_callback() -** or sqlite3_rtree_query_callback(). In other words, this routine is the -** destructor for an RtreeGeomCallback objecct. This routine is called when -** the corresponding SQL function is deleted. +** Update the contents of the rbu_state table within the rbu database. The +** value stored in the RBU_STATE_STAGE column is eStage. All other values +** are determined by inspecting the rbu handle passed as the first argument. */ -static void rtreeFreeCallback(void *p){ - RtreeGeomCallback *pInfo = (RtreeGeomCallback*)p; - if( pInfo->xDestructor ) pInfo->xDestructor(pInfo->pContext); - sqlite3_free(p); +static void rbuSaveState(sqlite3rbu *p, int eStage){ + if( p->rc==SQLITE_OK || p->rc==SQLITE_DONE ){ + sqlite3_stmt *pInsert = 0; + int rc; + + assert( p->zErrmsg==0 ); + rc = prepareFreeAndCollectError(p->dbRbu, &pInsert, &p->zErrmsg, + sqlite3_mprintf( + "INSERT OR REPLACE INTO %s.rbu_state(k, v) VALUES " + "(%d, %d), " + "(%d, %Q), " + "(%d, %Q), " + "(%d, %d), " + "(%d, %d), " + "(%d, %lld), " + "(%d, %lld), " + "(%d, %lld) ", + p->zStateDb, + RBU_STATE_STAGE, eStage, + RBU_STATE_TBL, p->objiter.zTbl, + RBU_STATE_IDX, p->objiter.zIdx, + RBU_STATE_ROW, p->nStep, + RBU_STATE_PROGRESS, p->nProgress, + RBU_STATE_CKPT, p->iWalCksum, + RBU_STATE_COOKIE, (i64)p->pTargetFd->iCookie, + RBU_STATE_OALSZ, p->iOalSz + ) + ); + assert( pInsert==0 || rc==SQLITE_OK ); + + if( rc==SQLITE_OK ){ + sqlite3_step(pInsert); + rc = sqlite3_finalize(pInsert); + } + if( rc!=SQLITE_OK ) p->rc = rc; + } } + /* -** Each call to sqlite3_rtree_geometry_callback() or -** sqlite3_rtree_query_callback() creates an ordinary SQLite -** scalar function that is implemented by this routine. -** -** All this function does is construct an RtreeMatchArg object that -** contains the geometry-checking callback routines and a list of -** parameters to this function, then return that RtreeMatchArg object -** as a BLOB. -** -** The R-Tree MATCH operator will read the returned BLOB, deserialize -** the RtreeMatchArg object, and use the RtreeMatchArg object to figure -** out which elements of the R-Tree should be returned by the query. +** Step the RBU object. */ -static void geomCallback(sqlite3_context *ctx, int nArg, sqlite3_value **aArg){ - RtreeGeomCallback *pGeomCtx = (RtreeGeomCallback *)sqlite3_user_data(ctx); - RtreeMatchArg *pBlob; - int nBlob; +SQLITE_API int SQLITE_STDCALL sqlite3rbu_step(sqlite3rbu *p){ + if( p ){ + switch( p->eStage ){ + case RBU_STAGE_OAL: { + RbuObjIter *pIter = &p->objiter; + while( p->rc==SQLITE_OK && pIter->zTbl ){ + + if( pIter->bCleanup ){ + /* Clean up the rbu_tmp_xxx table for the previous table. It + ** cannot be dropped as there are currently active SQL statements. + ** But the contents can be deleted. */ + if( pIter->abIndexed ){ + rbuMPrintfExec(p, p->dbRbu, + "DELETE FROM %s.'rbu_tmp_%q'", p->zStateDb, pIter->zTbl + ); + } + }else{ + rbuObjIterPrepareAll(p, pIter, 0); + + /* Advance to the next row to process. */ + if( p->rc==SQLITE_OK ){ + int rc = sqlite3_step(pIter->pSelect); + if( rc==SQLITE_ROW ){ + p->nProgress++; + p->nStep++; + return rbuStep(p); + } + p->rc = sqlite3_reset(pIter->pSelect); + p->nStep = 0; + } + } - nBlob = sizeof(RtreeMatchArg) + (nArg-1)*sizeof(RtreeDValue); - pBlob = (RtreeMatchArg *)sqlite3_malloc(nBlob); - if( !pBlob ){ - sqlite3_result_error_nomem(ctx); - }else{ - int i; - pBlob->magic = RTREE_GEOMETRY_MAGIC; - pBlob->cb = pGeomCtx[0]; - pBlob->nParam = nArg; - for(i=0; i<nArg; i++){ -#ifdef SQLITE_RTREE_INT_ONLY - pBlob->aParam[i] = sqlite3_value_int64(aArg[i]); -#else - pBlob->aParam[i] = sqlite3_value_double(aArg[i]); -#endif + rbuObjIterNext(p, pIter); + } + + if( p->rc==SQLITE_OK ){ + assert( pIter->zTbl==0 ); + rbuSaveState(p, RBU_STAGE_MOVE); + rbuIncrSchemaCookie(p); + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbMain, "COMMIT", 0, 0, &p->zErrmsg); + } + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbRbu, "COMMIT", 0, 0, &p->zErrmsg); + } + p->eStage = RBU_STAGE_MOVE; + } + break; + } + + case RBU_STAGE_MOVE: { + if( p->rc==SQLITE_OK ){ + rbuMoveOalFile(p); + p->nProgress++; + } + break; + } + + case RBU_STAGE_CKPT: { + if( p->rc==SQLITE_OK ){ + if( p->nStep>=p->nFrame ){ + sqlite3_file *pDb = p->pTargetFd->pReal; + + /* Sync the db file */ + p->rc = pDb->pMethods->xSync(pDb, SQLITE_SYNC_NORMAL); + + /* Update nBackfill */ + if( p->rc==SQLITE_OK ){ + void volatile *ptr; + p->rc = pDb->pMethods->xShmMap(pDb, 0, 32*1024, 0, &ptr); + if( p->rc==SQLITE_OK ){ + ((u32 volatile*)ptr)[24] = p->iMaxFrame; + } + } + + if( p->rc==SQLITE_OK ){ + p->eStage = RBU_STAGE_DONE; + p->rc = SQLITE_DONE; + } + }else{ + RbuFrame *pFrame = &p->aFrame[p->nStep]; + rbuCheckpointFrame(p, pFrame); + p->nStep++; + } + p->nProgress++; + } + break; + } + + default: + break; } - sqlite3_result_blob(ctx, pBlob, nBlob, sqlite3_free); + return p->rc; + }else{ + return SQLITE_NOMEM; } } /* -** Register a new geometry function for use with the r-tree MATCH operator. +** Free an RbuState object allocated by rbuLoadState(). */ -SQLITE_API int SQLITE_STDCALL sqlite3_rtree_geometry_callback( - sqlite3 *db, /* Register SQL function on this connection */ - const char *zGeom, /* Name of the new SQL function */ - int (*xGeom)(sqlite3_rtree_geometry*,int,RtreeDValue*,int*), /* Callback */ - void *pContext /* Extra data associated with the callback */ -){ - RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ - - /* Allocate and populate the context object. */ - pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); - if( !pGeomCtx ) return SQLITE_NOMEM; - pGeomCtx->xGeom = xGeom; - pGeomCtx->xQueryFunc = 0; - pGeomCtx->xDestructor = 0; - pGeomCtx->pContext = pContext; - return sqlite3_create_function_v2(db, zGeom, -1, SQLITE_ANY, - (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback - ); +static void rbuFreeState(RbuState *p){ + if( p ){ + sqlite3_free(p->zTbl); + sqlite3_free(p->zIdx); + sqlite3_free(p); + } } /* -** Register a new 2nd-generation geometry function for use with the -** r-tree MATCH operator. +** Allocate an RbuState object and load the contents of the rbu_state +** table into it. Return a pointer to the new object. It is the +** responsibility of the caller to eventually free the object using +** sqlite3_free(). +** +** If an error occurs, leave an error code and message in the rbu handle +** and return NULL. */ -SQLITE_API int SQLITE_STDCALL sqlite3_rtree_query_callback( - sqlite3 *db, /* Register SQL function on this connection */ - const char *zQueryFunc, /* Name of new SQL function */ - int (*xQueryFunc)(sqlite3_rtree_query_info*), /* Callback */ - void *pContext, /* Extra data passed into the callback */ - void (*xDestructor)(void*) /* Destructor for the extra data */ -){ - RtreeGeomCallback *pGeomCtx; /* Context object for new user-function */ +static RbuState *rbuLoadState(sqlite3rbu *p){ + RbuState *pRet = 0; + sqlite3_stmt *pStmt = 0; + int rc; + int rc2; - /* Allocate and populate the context object. */ - pGeomCtx = (RtreeGeomCallback *)sqlite3_malloc(sizeof(RtreeGeomCallback)); - if( !pGeomCtx ) return SQLITE_NOMEM; - pGeomCtx->xGeom = 0; - pGeomCtx->xQueryFunc = xQueryFunc; - pGeomCtx->xDestructor = xDestructor; - pGeomCtx->pContext = pContext; - return sqlite3_create_function_v2(db, zQueryFunc, -1, SQLITE_ANY, - (void *)pGeomCtx, geomCallback, 0, 0, rtreeFreeCallback + pRet = (RbuState*)rbuMalloc(p, sizeof(RbuState)); + if( pRet==0 ) return 0; + + rc = prepareFreeAndCollectError(p->dbRbu, &pStmt, &p->zErrmsg, + sqlite3_mprintf("SELECT k, v FROM %s.rbu_state", p->zStateDb) ); -} + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + switch( sqlite3_column_int(pStmt, 0) ){ + case RBU_STATE_STAGE: + pRet->eStage = sqlite3_column_int(pStmt, 1); + if( pRet->eStage!=RBU_STAGE_OAL + && pRet->eStage!=RBU_STAGE_MOVE + && pRet->eStage!=RBU_STAGE_CKPT + ){ + p->rc = SQLITE_CORRUPT; + } + break; -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -SQLITE_API int SQLITE_STDCALL sqlite3_rtree_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3RtreeInit(db); + case RBU_STATE_TBL: + pRet->zTbl = rbuStrndup((char*)sqlite3_column_text(pStmt, 1), &rc); + break; + + case RBU_STATE_IDX: + pRet->zIdx = rbuStrndup((char*)sqlite3_column_text(pStmt, 1), &rc); + break; + + case RBU_STATE_ROW: + pRet->nRow = sqlite3_column_int(pStmt, 1); + break; + + case RBU_STATE_PROGRESS: + pRet->nProgress = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_CKPT: + pRet->iWalCksum = sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_COOKIE: + pRet->iCookie = (u32)sqlite3_column_int64(pStmt, 1); + break; + + case RBU_STATE_OALSZ: + pRet->iOalSz = (u32)sqlite3_column_int64(pStmt, 1); + break; + + default: + rc = SQLITE_CORRUPT; + break; + } + } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; + + p->rc = rc; + return pRet; } -#endif -#endif +/* +** Compare strings z1 and z2, returning 0 if they are identical, or non-zero +** otherwise. Either or both argument may be NULL. Two NULL values are +** considered equal, and NULL is considered distinct from all other values. +*/ +static int rbuStrCompare(const char *z1, const char *z2){ + if( z1==0 && z2==0 ) return 0; + if( z1==0 || z2==0 ) return 1; + return (sqlite3_stricmp(z1, z2)!=0); +} -/************** End of rtree.c ***********************************************/ -/************** Begin file icu.c *********************************************/ /* -** 2007 May 6 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** $Id: icu.c,v 1.7 2007/12/13 21:54:11 drh Exp $ -** -** This file implements an integration between the ICU library -** ("International Components for Unicode", an open-source library -** for handling unicode data) and SQLite. The integration uses -** ICU to provide the following to SQLite: +** This function is called as part of sqlite3rbu_open() when initializing +** an rbu handle in OAL stage. If the rbu update has not started (i.e. +** the rbu_state table was empty) it is a no-op. Otherwise, it arranges +** things so that the next call to sqlite3rbu_step() continues on from +** where the previous rbu handle left off. ** -** * An implementation of the SQL regexp() function (and hence REGEXP -** operator) using the ICU uregex_XX() APIs. -** -** * Implementations of the SQL scalar upper() and lower() functions -** for case mapping. -** -** * Integration of ICU and SQLite collation sequences. -** -** * An implementation of the LIKE operator that uses ICU to -** provide case-independent matching. +** If an error occurs, an error code and error message are left in the +** rbu handle passed as the first argument. */ +static void rbuSetupOal(sqlite3rbu *p, RbuState *pState){ + assert( p->rc==SQLITE_OK ); + if( pState->zTbl ){ + RbuObjIter *pIter = &p->objiter; + int rc = SQLITE_OK; -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_ICU) + while( rc==SQLITE_OK && pIter->zTbl && (pIter->bCleanup + || rbuStrCompare(pIter->zIdx, pState->zIdx) + || rbuStrCompare(pIter->zTbl, pState->zTbl) + )){ + rc = rbuObjIterNext(p, pIter); + } -/* Include ICU headers */ -#include <unicode/utypes.h> -#include <unicode/uregex.h> -#include <unicode/ustring.h> -#include <unicode/ucol.h> + if( rc==SQLITE_OK && !pIter->zTbl ){ + rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("rbu_state mismatch error"); + } -/* #include <assert.h> */ + if( rc==SQLITE_OK ){ + p->nStep = pState->nRow; + rc = rbuObjIterPrepareAll(p, &p->objiter, p->nStep); + } -#ifndef SQLITE_CORE - SQLITE_EXTENSION_INIT1 -#else -#endif + p->rc = rc; + } +} /* -** Maximum length (in bytes) of the pattern in a LIKE or GLOB -** operator. +** If there is a "*-oal" file in the file-system corresponding to the +** target database in the file-system, delete it. If an error occurs, +** leave an error code and error message in the rbu handle. */ -#ifndef SQLITE_MAX_LIKE_PATTERN_LENGTH -# define SQLITE_MAX_LIKE_PATTERN_LENGTH 50000 -#endif +static void rbuDeleteOalFile(sqlite3rbu *p){ + char *zOal = sqlite3_mprintf("%s-oal", p->zTarget); + assert( p->rc==SQLITE_OK && p->zErrmsg==0 ); + unlink(zOal); + sqlite3_free(zOal); +} /* -** Version of sqlite3_free() that is always a function, never a macro. +** Allocate a private rbu VFS for the rbu handle passed as the only +** argument. This VFS will be used unless the call to sqlite3rbu_open() +** specified a URI with a vfs=? option in place of a target database +** file name. */ -static void xFree(void *p){ - sqlite3_free(p); +static void rbuCreateVfs(sqlite3rbu *p){ + int rnd; + char zRnd[64]; + + assert( p->rc==SQLITE_OK ); + sqlite3_randomness(sizeof(int), (void*)&rnd); + sqlite3_snprintf(sizeof(zRnd), zRnd, "rbu_vfs_%d", rnd); + p->rc = sqlite3rbu_create_vfs(zRnd, 0); + if( p->rc==SQLITE_OK ){ + sqlite3_vfs *pVfs = sqlite3_vfs_find(zRnd); + assert( pVfs ); + p->zVfsName = pVfs->zName; + } } /* -** Compare two UTF-8 strings for equality where the first string is -** a "LIKE" expression. Return true (1) if they are the same and -** false (0) if they are different. +** Destroy the private VFS created for the rbu handle passed as the only +** argument by an earlier call to rbuCreateVfs(). */ -static int icuLikeCompare( - const uint8_t *zPattern, /* LIKE pattern */ - const uint8_t *zString, /* The UTF-8 string to compare against */ - const UChar32 uEsc /* The escape character */ +static void rbuDeleteVfs(sqlite3rbu *p){ + if( p->zVfsName ){ + sqlite3rbu_destroy_vfs(p->zVfsName); + p->zVfsName = 0; + } +} + +/* +** Open and return a new RBU handle. +*/ +SQLITE_API sqlite3rbu *SQLITE_STDCALL sqlite3rbu_open( + const char *zTarget, + const char *zRbu, + const char *zState ){ - static const int MATCH_ONE = (UChar32)'_'; - static const int MATCH_ALL = (UChar32)'%'; + sqlite3rbu *p; + int nTarget = strlen(zTarget); + int nRbu = strlen(zRbu); + int nState = zState ? strlen(zState) : 0; - int iPattern = 0; /* Current byte index in zPattern */ - int iString = 0; /* Current byte index in zString */ + p = (sqlite3rbu*)sqlite3_malloc(sizeof(sqlite3rbu)+nTarget+1+nRbu+1+nState+1); + if( p ){ + RbuState *pState = 0; - int prevEscape = 0; /* True if the previous character was uEsc */ + /* Create the custom VFS. */ + memset(p, 0, sizeof(sqlite3rbu)); + rbuCreateVfs(p); - while( zPattern[iPattern]!=0 ){ + /* Open the target database */ + if( p->rc==SQLITE_OK ){ + p->zTarget = (char*)&p[1]; + memcpy(p->zTarget, zTarget, nTarget+1); + p->zRbu = &p->zTarget[nTarget+1]; + memcpy(p->zRbu, zRbu, nRbu+1); + if( zState ){ + p->zState = &p->zRbu[nRbu+1]; + memcpy(p->zState, zState, nState+1); + } + rbuOpenDatabase(p); + } - /* Read (and consume) the next character from the input pattern. */ - UChar32 uPattern; - U8_NEXT_UNSAFE(zPattern, iPattern, uPattern); - assert(uPattern!=0); + /* If it has not already been created, create the rbu_state table */ + rbuMPrintfExec(p, p->dbRbu, RBU_CREATE_STATE, p->zStateDb); - /* There are now 4 possibilities: - ** - ** 1. uPattern is an unescaped match-all character "%", - ** 2. uPattern is an unescaped match-one character "_", - ** 3. uPattern is an unescaped escape character, or - ** 4. uPattern is to be handled as an ordinary character - */ - if( !prevEscape && uPattern==MATCH_ALL ){ - /* Case 1. */ - uint8_t c; + if( p->rc==SQLITE_OK ){ + pState = rbuLoadState(p); + assert( pState || p->rc!=SQLITE_OK ); + if( p->rc==SQLITE_OK ){ - /* Skip any MATCH_ALL or MATCH_ONE characters that follow a - ** MATCH_ALL. For each MATCH_ONE, skip one character in the - ** test string. - */ - while( (c=zPattern[iPattern]) == MATCH_ALL || c == MATCH_ONE ){ - if( c==MATCH_ONE ){ - if( zString[iString]==0 ) return 0; - U8_FWD_1_UNSAFE(zString, iString); + if( pState->eStage==0 ){ + rbuDeleteOalFile(p); + p->eStage = RBU_STAGE_OAL; + }else{ + p->eStage = pState->eStage; } - iPattern++; + p->nProgress = pState->nProgress; + p->iOalSz = pState->iOalSz; } + } + assert( p->rc!=SQLITE_OK || p->eStage!=0 ); - if( zPattern[iPattern]==0 ) return 1; - - while( zString[iString] ){ - if( icuLikeCompare(&zPattern[iPattern], &zString[iString], uEsc) ){ - return 1; - } - U8_FWD_1_UNSAFE(zString, iString); + if( p->rc==SQLITE_OK && p->pTargetFd->pWalFd ){ + if( p->eStage==RBU_STAGE_OAL ){ + p->rc = SQLITE_ERROR; + p->zErrmsg = sqlite3_mprintf("cannot update wal mode database"); + }else if( p->eStage==RBU_STAGE_MOVE ){ + p->eStage = RBU_STAGE_CKPT; + p->nStep = 0; } - return 0; - - }else if( !prevEscape && uPattern==MATCH_ONE ){ - /* Case 2. */ - if( zString[iString]==0 ) return 0; - U8_FWD_1_UNSAFE(zString, iString); - - }else if( !prevEscape && uPattern==uEsc){ - /* Case 3. */ - prevEscape = 1; + } - }else{ - /* Case 4. */ - UChar32 uString; - U8_NEXT_UNSAFE(zString, iString, uString); - uString = u_foldCase(uString, U_FOLD_CASE_DEFAULT); - uPattern = u_foldCase(uPattern, U_FOLD_CASE_DEFAULT); - if( uString!=uPattern ){ - return 0; - } - prevEscape = 0; + if( p->rc==SQLITE_OK + && (p->eStage==RBU_STAGE_OAL || p->eStage==RBU_STAGE_MOVE) + && pState->eStage!=0 && p->pTargetFd->iCookie!=pState->iCookie + ){ + /* At this point (pTargetFd->iCookie) contains the value of the + ** change-counter cookie (the thing that gets incremented when a + ** transaction is committed in rollback mode) currently stored on + ** page 1 of the database file. */ + p->rc = SQLITE_BUSY; + p->zErrmsg = sqlite3_mprintf("database modified during rbu update"); } - } - return zString[iString]==0; -} + if( p->rc==SQLITE_OK ){ + if( p->eStage==RBU_STAGE_OAL ){ -/* -** Implementation of the like() SQL function. This function implements -** the build-in LIKE operator. The first argument to the function is the -** pattern and the second argument is the string. So, the SQL statements: -** -** A LIKE B -** -** is implemented as like(B, A). If there is an escape character E, -** -** A LIKE B ESCAPE E -** -** is mapped to like(B, A, E). -*/ -static void icuLikeFunc( - sqlite3_context *context, - int argc, - sqlite3_value **argv -){ - const unsigned char *zA = sqlite3_value_text(argv[0]); - const unsigned char *zB = sqlite3_value_text(argv[1]); - UChar32 uEsc = 0; + /* Open transactions both databases. The *-oal file is opened or + ** created at this point. */ + p->rc = sqlite3_exec(p->dbMain, "BEGIN IMMEDIATE", 0, 0, &p->zErrmsg); + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->dbRbu, "BEGIN IMMEDIATE", 0, 0, &p->zErrmsg); + } + + /* Point the object iterator at the first object */ + if( p->rc==SQLITE_OK ){ + p->rc = rbuObjIterFirst(p, &p->objiter); + } - /* Limit the length of the LIKE or GLOB pattern to avoid problems - ** of deep recursion and N*N behavior in patternCompare(). - */ - if( sqlite3_value_bytes(argv[0])>SQLITE_MAX_LIKE_PATTERN_LENGTH ){ - sqlite3_result_error(context, "LIKE or GLOB pattern too complex", -1); - return; - } + /* If the RBU database contains no data_xxx tables, declare the RBU + ** update finished. */ + if( p->rc==SQLITE_OK && p->objiter.zTbl==0 ){ + p->rc = SQLITE_DONE; + } + if( p->rc==SQLITE_OK ){ + rbuSetupOal(p, pState); + } - if( argc==3 ){ - /* The escape character string must consist of a single UTF-8 character. - ** Otherwise, return an error. - */ - int nE= sqlite3_value_bytes(argv[2]); - const unsigned char *zE = sqlite3_value_text(argv[2]); - int i = 0; - if( zE==0 ) return; - U8_NEXT(zE, i, nE, uEsc); - if( i!=nE){ - sqlite3_result_error(context, - "ESCAPE expression must be a single character", -1); - return; + }else if( p->eStage==RBU_STAGE_MOVE ){ + /* no-op */ + }else if( p->eStage==RBU_STAGE_CKPT ){ + rbuSetupCheckpoint(p, pState); + }else if( p->eStage==RBU_STAGE_DONE ){ + p->rc = SQLITE_DONE; + }else{ + p->rc = SQLITE_CORRUPT; + } } - } - if( zA && zB ){ - sqlite3_result_int(context, icuLikeCompare(zA, zB, uEsc)); + rbuFreeState(pState); } + + return p; } + /* -** This function is called when an ICU function called from within -** the implementation of an SQL scalar function returns an error. -** -** The scalar function context passed as the first argument is -** loaded with an error message based on the following two args. +** Return the database handle used by pRbu. */ -static void icuFunctionError( - sqlite3_context *pCtx, /* SQLite scalar function context */ - const char *zName, /* Name of ICU function that failed */ - UErrorCode e /* Error code returned by ICU function */ -){ - char zBuf[128]; - sqlite3_snprintf(128, zBuf, "ICU error: %s(): %s", zName, u_errorName(e)); - zBuf[127] = '\0'; - sqlite3_result_error(pCtx, zBuf, -1); +SQLITE_API sqlite3 *SQLITE_STDCALL sqlite3rbu_db(sqlite3rbu *pRbu, int bRbu){ + sqlite3 *db = 0; + if( pRbu ){ + db = (bRbu ? pRbu->dbRbu : pRbu->dbMain); + } + return db; } + /* -** Function to delete compiled regexp objects. Registered as -** a destructor function with sqlite3_set_auxdata(). +** If the error code currently stored in the RBU handle is SQLITE_CONSTRAINT, +** then edit any error message string so as to remove all occurrences of +** the pattern "rbu_imp_[0-9]*". */ -static void icuRegexpDelete(void *p){ - URegularExpression *pExpr = (URegularExpression *)p; - uregex_close(pExpr); +static void rbuEditErrmsg(sqlite3rbu *p){ + if( p->rc==SQLITE_CONSTRAINT && p->zErrmsg ){ + int i; + int nErrmsg = strlen(p->zErrmsg); + for(i=0; i<(nErrmsg-8); i++){ + if( memcmp(&p->zErrmsg[i], "rbu_imp_", 8)==0 ){ + int nDel = 8; + while( p->zErrmsg[i+nDel]>='0' && p->zErrmsg[i+nDel]<='9' ) nDel++; + memmove(&p->zErrmsg[i], &p->zErrmsg[i+nDel], nErrmsg + 1 - i - nDel); + nErrmsg -= nDel; + } + } + } } /* -** Implementation of SQLite REGEXP operator. This scalar function takes -** two arguments. The first is a regular expression pattern to compile -** the second is a string to match against that pattern. If either -** argument is an SQL NULL, then NULL Is returned. Otherwise, the result -** is 1 if the string matches the pattern, or 0 otherwise. -** -** SQLite maps the regexp() function to the regexp() operator such -** that the following two are equivalent: -** -** zString REGEXP zPattern -** regexp(zPattern, zString) -** -** Uses the following ICU regexp APIs: -** -** uregex_open() -** uregex_matches() -** uregex_close() +** Close the RBU handle. */ -static void icuRegexpFunc(sqlite3_context *p, int nArg, sqlite3_value **apArg){ - UErrorCode status = U_ZERO_ERROR; - URegularExpression *pExpr; - UBool res; - const UChar *zString = sqlite3_value_text16(apArg[1]); +SQLITE_API int SQLITE_STDCALL sqlite3rbu_close(sqlite3rbu *p, char **pzErrmsg){ + int rc; + if( p ){ - (void)nArg; /* Unused parameter */ + /* Commit the transaction to the *-oal file. */ + if( p->rc==SQLITE_OK && p->eStage==RBU_STAGE_OAL ){ + p->rc = sqlite3_exec(p->dbMain, "COMMIT", 0, 0, &p->zErrmsg); + } - /* If the left hand side of the regexp operator is NULL, - ** then the result is also NULL. - */ - if( !zString ){ - return; - } + rbuSaveState(p, p->eStage); - pExpr = sqlite3_get_auxdata(p, 0); - if( !pExpr ){ - const UChar *zPattern = sqlite3_value_text16(apArg[0]); - if( !zPattern ){ - return; + if( p->rc==SQLITE_OK && p->eStage==RBU_STAGE_OAL ){ + p->rc = sqlite3_exec(p->dbRbu, "COMMIT", 0, 0, &p->zErrmsg); } - pExpr = uregex_open(zPattern, -1, 0, 0, &status); - if( U_SUCCESS(status) ){ - sqlite3_set_auxdata(p, 0, pExpr, icuRegexpDelete); - }else{ - assert(!pExpr); - icuFunctionError(p, "uregex_open", status); - return; - } - } + /* Close any open statement handles. */ + rbuObjIterFinalize(&p->objiter); - /* Configure the text that the regular expression operates on. */ - uregex_setText(pExpr, zString, -1, &status); - if( !U_SUCCESS(status) ){ - icuFunctionError(p, "uregex_setText", status); - return; - } + /* Close the open database handle and VFS object. */ + sqlite3_close(p->dbMain); + sqlite3_close(p->dbRbu); + rbuDeleteVfs(p); + sqlite3_free(p->aBuf); + sqlite3_free(p->aFrame); - /* Attempt the match */ - res = uregex_matches(pExpr, 0, &status); - if( !U_SUCCESS(status) ){ - icuFunctionError(p, "uregex_matches", status); - return; + rbuEditErrmsg(p); + rc = p->rc; + *pzErrmsg = p->zErrmsg; + sqlite3_free(p); + }else{ + rc = SQLITE_NOMEM; + *pzErrmsg = 0; } - - /* Set the text that the regular expression operates on to a NULL - ** pointer. This is not really necessary, but it is tidier than - ** leaving the regular expression object configured with an invalid - ** pointer after this function returns. - */ - uregex_setText(pExpr, 0, 0, &status); - - /* Return 1 or 0. */ - sqlite3_result_int(p, res ? 1 : 0); + return rc; } /* -** Implementations of scalar functions for case mapping - upper() and -** lower(). Function upper() converts its input to upper-case (ABC). -** Function lower() converts to lower-case (abc). +** Return the total number of key-value operations (inserts, deletes or +** updates) that have been performed on the target database since the +** current RBU update was started. +*/ +SQLITE_API sqlite3_int64 SQLITE_STDCALL sqlite3rbu_progress(sqlite3rbu *pRbu){ + return pRbu->nProgress; +} + +/************************************************************************** +** Beginning of RBU VFS shim methods. The VFS shim modifies the behaviour +** of a standard VFS in the following ways: ** -** ICU provides two types of case mapping, "general" case mapping and -** "language specific". Refer to ICU documentation for the differences -** between the two. +** 1. Whenever the first page of a main database file is read or +** written, the value of the change-counter cookie is stored in +** rbu_file.iCookie. Similarly, the value of the "write-version" +** database header field is stored in rbu_file.iWriteVer. This ensures +** that the values are always trustworthy within an open transaction. ** -** To utilise "general" case mapping, the upper() or lower() scalar -** functions are invoked with one argument: +** 2. Whenever an SQLITE_OPEN_WAL file is opened, the (rbu_file.pWalFd) +** member variable of the associated database file descriptor is set +** to point to the new file. A mutex protected linked list of all main +** db fds opened using a particular RBU VFS is maintained at +** rbu_vfs.pMain to facilitate this. ** -** upper('ABC') -> 'abc' -** lower('abc') -> 'ABC' +** 3. Using a new file-control "SQLITE_FCNTL_RBU", a main db rbu_file +** object can be marked as the target database of an RBU update. This +** turns on the following extra special behaviour: ** -** To access ICU "language specific" case mapping, upper() or lower() -** should be invoked with two arguments. The second argument is the name -** of the locale to use. Passing an empty string ("") or SQL NULL value -** as the second argument is the same as invoking the 1 argument version -** of upper() or lower(). +** 3a. If xAccess() is called to check if there exists a *-wal file +** associated with an RBU target database currently in RBU_STAGE_OAL +** stage (preparing the *-oal file), the following special handling +** applies: ** -** lower('I', 'en_us') -> 'i' -** lower('I', 'tr_tr') -> 'ı' (small dotless i) +** * if the *-wal file does exist, return SQLITE_CANTOPEN. An RBU +** target database may not be in wal mode already. ** -** http://www.icu-project.org/userguide/posix.html#case_mappings +** * if the *-wal file does not exist, set the output parameter to +** non-zero (to tell SQLite that it does exist) anyway. +** +** Then, when xOpen() is called to open the *-wal file associated with +** the RBU target in RBU_STAGE_OAL stage, instead of opening the *-wal +** file, the rbu vfs opens the corresponding *-oal file instead. +** +** 3b. The *-shm pages returned by xShmMap() for a target db file in +** RBU_STAGE_OAL mode are actually stored in heap memory. This is to +** avoid creating a *-shm file on disk. Additionally, xShmLock() calls +** are no-ops on target database files in RBU_STAGE_OAL mode. This is +** because assert() statements in some VFS implementations fail if +** xShmLock() is called before xShmMap(). +** +** 3c. If an EXCLUSIVE lock is attempted on a target database file in any +** mode except RBU_STAGE_DONE (all work completed and checkpointed), it +** fails with an SQLITE_BUSY error. This is to stop RBU connections +** from automatically checkpointing a *-wal (or *-oal) file from within +** sqlite3_close(). +** +** 3d. In RBU_STAGE_CAPTURE mode, all xRead() calls on the wal file, and +** all xWrite() calls on the target database file perform no IO. +** Instead the frame and page numbers that would be read and written +** are recorded. Additionally, successful attempts to obtain exclusive +** xShmLock() WRITER, CHECKPOINTER and READ0 locks on the target +** database file are recorded. xShmLock() calls to unlock the same +** locks are no-ops (so that once obtained, these locks are never +** relinquished). Finally, calls to xSync() on the target database +** file fail with SQLITE_INTERNAL errors. */ -static void icuCaseFunc16(sqlite3_context *p, int nArg, sqlite3_value **apArg){ - const UChar *zInput; - UChar *zOutput; - int nInput; - int nOutput; - - UErrorCode status = U_ZERO_ERROR; - const char *zLocale = 0; - - assert(nArg==1 || nArg==2); - if( nArg==2 ){ - zLocale = (const char *)sqlite3_value_text(apArg[1]); - } - zInput = sqlite3_value_text16(apArg[0]); - if( !zInput ){ - return; +static void rbuUnlockShm(rbu_file *p){ + if( p->pRbu ){ + int (*xShmLock)(sqlite3_file*,int,int,int) = p->pReal->pMethods->xShmLock; + int i; + for(i=0; i<SQLITE_SHM_NLOCK;i++){ + if( (1<<i) & p->pRbu->mLock ){ + xShmLock(p->pReal, i, 1, SQLITE_SHM_UNLOCK|SQLITE_SHM_EXCLUSIVE); + } + } + p->pRbu->mLock = 0; } - nInput = sqlite3_value_bytes16(apArg[0]); +} - nOutput = nInput * 2 + 2; - zOutput = sqlite3_malloc(nOutput); - if( !zOutput ){ - return; - } +/* +** Close an rbu file. +*/ +static int rbuVfsClose(sqlite3_file *pFile){ + rbu_file *p = (rbu_file*)pFile; + int rc; + int i; - if( sqlite3_user_data(p) ){ - u_strToUpper(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); - }else{ - u_strToLower(zOutput, nOutput/2, zInput, nInput/2, zLocale, &status); + /* Free the contents of the apShm[] array. And the array itself. */ + for(i=0; i<p->nShm; i++){ + sqlite3_free(p->apShm[i]); } + sqlite3_free(p->apShm); + p->apShm = 0; + sqlite3_free(p->zDel); - if( !U_SUCCESS(status) ){ - icuFunctionError(p, "u_strToLower()/u_strToUpper", status); - return; + if( p->openFlags & SQLITE_OPEN_MAIN_DB ){ + rbu_file **pp; + sqlite3_mutex_enter(p->pRbuVfs->mutex); + for(pp=&p->pRbuVfs->pMain; *pp!=p; pp=&((*pp)->pMainNext)); + *pp = p->pMainNext; + sqlite3_mutex_leave(p->pRbuVfs->mutex); + rbuUnlockShm(p); + p->pReal->pMethods->xShmUnmap(p->pReal, 0); } - sqlite3_result_text16(p, zOutput, -1, xFree); + /* Close the underlying file handle */ + rc = p->pReal->pMethods->xClose(p->pReal); + return rc; } + /* -** Collation sequence destructor function. The pCtx argument points to -** a UCollator structure previously allocated using ucol_open(). +** Read and return an unsigned 32-bit big-endian integer from the buffer +** passed as the only argument. */ -static void icuCollationDel(void *pCtx){ - UCollator *p = (UCollator *)pCtx; - ucol_close(p); +static u32 rbuGetU32(u8 *aBuf){ + return ((u32)aBuf[0] << 24) + + ((u32)aBuf[1] << 16) + + ((u32)aBuf[2] << 8) + + ((u32)aBuf[3]); } /* -** Collation sequence comparison function. The pCtx argument points to -** a UCollator structure previously allocated using ucol_open(). +** Read data from an rbuVfs-file. */ -static int icuCollationColl( - void *pCtx, - int nLeft, - const void *zLeft, - int nRight, - const void *zRight +static int rbuVfsRead( + sqlite3_file *pFile, + void *zBuf, + int iAmt, + sqlite_int64 iOfst ){ - UCollationResult res; - UCollator *p = (UCollator *)pCtx; - res = ucol_strcoll(p, (UChar *)zLeft, nLeft/2, (UChar *)zRight, nRight/2); - switch( res ){ - case UCOL_LESS: return -1; - case UCOL_GREATER: return +1; - case UCOL_EQUAL: return 0; + rbu_file *p = (rbu_file*)pFile; + sqlite3rbu *pRbu = p->pRbu; + int rc; + + if( pRbu && pRbu->eStage==RBU_STAGE_CAPTURE ){ + assert( p->openFlags & SQLITE_OPEN_WAL ); + rc = rbuCaptureWalRead(p->pRbu, iOfst, iAmt); + }else{ + if( pRbu && pRbu->eStage==RBU_STAGE_OAL + && (p->openFlags & SQLITE_OPEN_WAL) + && iOfst>=pRbu->iOalSz + ){ + rc = SQLITE_OK; + memset(zBuf, 0, iAmt); + }else{ + rc = p->pReal->pMethods->xRead(p->pReal, zBuf, iAmt, iOfst); + } + if( rc==SQLITE_OK && iOfst==0 && (p->openFlags & SQLITE_OPEN_MAIN_DB) ){ + /* These look like magic numbers. But they are stable, as they are part + ** of the definition of the SQLite file format, which may not change. */ + u8 *pBuf = (u8*)zBuf; + p->iCookie = rbuGetU32(&pBuf[24]); + p->iWriteVer = pBuf[19]; + } } - assert(!"Unexpected return value from ucol_strcoll()"); - return 0; + return rc; } /* -** Implementation of the scalar function icu_load_collation(). -** -** This scalar function is used to add ICU collation based collation -** types to an SQLite database connection. It is intended to be called -** as follows: -** -** SELECT icu_load_collation(<locale>, <collation-name>); -** -** Where <locale> is a string containing an ICU locale identifier (i.e. -** "en_AU", "tr_TR" etc.) and <collation-name> is the name of the -** collation sequence to create. +** Write data to an rbuVfs-file. */ -static void icuLoadCollation( - sqlite3_context *p, - int nArg, - sqlite3_value **apArg +static int rbuVfsWrite( + sqlite3_file *pFile, + const void *zBuf, + int iAmt, + sqlite_int64 iOfst ){ - sqlite3 *db = (sqlite3 *)sqlite3_user_data(p); - UErrorCode status = U_ZERO_ERROR; - const char *zLocale; /* Locale identifier - (eg. "jp_JP") */ - const char *zName; /* SQL Collation sequence name (eg. "japanese") */ - UCollator *pUCollator; /* ICU library collation object */ - int rc; /* Return code from sqlite3_create_collation_x() */ - - assert(nArg==2); - zLocale = (const char *)sqlite3_value_text(apArg[0]); - zName = (const char *)sqlite3_value_text(apArg[1]); + rbu_file *p = (rbu_file*)pFile; + sqlite3rbu *pRbu = p->pRbu; + int rc; - if( !zLocale || !zName ){ - return; + if( pRbu && pRbu->eStage==RBU_STAGE_CAPTURE ){ + assert( p->openFlags & SQLITE_OPEN_MAIN_DB ); + rc = rbuCaptureDbWrite(p->pRbu, iOfst); + }else{ + if( pRbu && pRbu->eStage==RBU_STAGE_OAL + && (p->openFlags & SQLITE_OPEN_WAL) + && iOfst>=pRbu->iOalSz + ){ + pRbu->iOalSz = iAmt + iOfst; + } + rc = p->pReal->pMethods->xWrite(p->pReal, zBuf, iAmt, iOfst); + if( rc==SQLITE_OK && iOfst==0 && (p->openFlags & SQLITE_OPEN_MAIN_DB) ){ + /* These look like magic numbers. But they are stable, as they are part + ** of the definition of the SQLite file format, which may not change. */ + u8 *pBuf = (u8*)zBuf; + p->iCookie = rbuGetU32(&pBuf[24]); + p->iWriteVer = pBuf[19]; + } } + return rc; +} - pUCollator = ucol_open(zLocale, &status); - if( !U_SUCCESS(status) ){ - icuFunctionError(p, "ucol_open", status); - return; - } - assert(p); +/* +** Truncate an rbuVfs-file. +*/ +static int rbuVfsTruncate(sqlite3_file *pFile, sqlite_int64 size){ + rbu_file *p = (rbu_file*)pFile; + return p->pReal->pMethods->xTruncate(p->pReal, size); +} - rc = sqlite3_create_collation_v2(db, zName, SQLITE_UTF16, (void *)pUCollator, - icuCollationColl, icuCollationDel - ); - if( rc!=SQLITE_OK ){ - ucol_close(pUCollator); - sqlite3_result_error(p, "Error registering collation function", -1); +/* +** Sync an rbuVfs-file. +*/ +static int rbuVfsSync(sqlite3_file *pFile, int flags){ + rbu_file *p = (rbu_file *)pFile; + if( p->pRbu && p->pRbu->eStage==RBU_STAGE_CAPTURE ){ + if( p->openFlags & SQLITE_OPEN_MAIN_DB ){ + return SQLITE_INTERNAL; + } + return SQLITE_OK; } + return p->pReal->pMethods->xSync(p->pReal, flags); } /* -** Register the ICU extension functions with database db. +** Return the current file-size of an rbuVfs-file. */ -SQLITE_PRIVATE int sqlite3IcuInit(sqlite3 *db){ - struct IcuScalar { - const char *zName; /* Function name */ - int nArg; /* Number of arguments */ - int enc; /* Optimal text encoding */ - void *pContext; /* sqlite3_user_data() context */ - void (*xFunc)(sqlite3_context*,int,sqlite3_value**); - } scalars[] = { - {"regexp", 2, SQLITE_ANY, 0, icuRegexpFunc}, - - {"lower", 1, SQLITE_UTF16, 0, icuCaseFunc16}, - {"lower", 2, SQLITE_UTF16, 0, icuCaseFunc16}, - {"upper", 1, SQLITE_UTF16, (void*)1, icuCaseFunc16}, - {"upper", 2, SQLITE_UTF16, (void*)1, icuCaseFunc16}, - - {"lower", 1, SQLITE_UTF8, 0, icuCaseFunc16}, - {"lower", 2, SQLITE_UTF8, 0, icuCaseFunc16}, - {"upper", 1, SQLITE_UTF8, (void*)1, icuCaseFunc16}, - {"upper", 2, SQLITE_UTF8, (void*)1, icuCaseFunc16}, - - {"like", 2, SQLITE_UTF8, 0, icuLikeFunc}, - {"like", 3, SQLITE_UTF8, 0, icuLikeFunc}, - - {"icu_load_collation", 2, SQLITE_UTF8, (void*)db, icuLoadCollation}, - }; +static int rbuVfsFileSize(sqlite3_file *pFile, sqlite_int64 *pSize){ + rbu_file *p = (rbu_file *)pFile; + return p->pReal->pMethods->xFileSize(p->pReal, pSize); +} +/* +** Lock an rbuVfs-file. +*/ +static int rbuVfsLock(sqlite3_file *pFile, int eLock){ + rbu_file *p = (rbu_file*)pFile; + sqlite3rbu *pRbu = p->pRbu; int rc = SQLITE_OK; - int i; - for(i=0; rc==SQLITE_OK && i<(int)(sizeof(scalars)/sizeof(scalars[0])); i++){ - struct IcuScalar *p = &scalars[i]; - rc = sqlite3_create_function( - db, p->zName, p->nArg, p->enc, p->pContext, p->xFunc, 0, 0 - ); + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) ); + if( pRbu && eLock==SQLITE_LOCK_EXCLUSIVE && pRbu->eStage!=RBU_STAGE_DONE ){ + /* Do not allow EXCLUSIVE locks. Preventing SQLite from taking this + ** prevents it from checkpointing the database from sqlite3_close(). */ + rc = SQLITE_BUSY; + }else{ + rc = p->pReal->pMethods->xLock(p->pReal, eLock); } return rc; } -#if !SQLITE_CORE -#ifdef _WIN32 -__declspec(dllexport) -#endif -SQLITE_API int SQLITE_STDCALL sqlite3_icu_init( - sqlite3 *db, - char **pzErrMsg, - const sqlite3_api_routines *pApi -){ - SQLITE_EXTENSION_INIT2(pApi) - return sqlite3IcuInit(db); +/* +** Unlock an rbuVfs-file. +*/ +static int rbuVfsUnlock(sqlite3_file *pFile, int eLock){ + rbu_file *p = (rbu_file *)pFile; + return p->pReal->pMethods->xUnlock(p->pReal, eLock); } -#endif -#endif +/* +** Check if another file-handle holds a RESERVED lock on an rbuVfs-file. +*/ +static int rbuVfsCheckReservedLock(sqlite3_file *pFile, int *pResOut){ + rbu_file *p = (rbu_file *)pFile; + return p->pReal->pMethods->xCheckReservedLock(p->pReal, pResOut); +} -/************** End of icu.c *************************************************/ -/************** Begin file fts3_icu.c ****************************************/ /* -** 2007 June 22 -** -** The author disclaims copyright to this source code. In place of -** a legal notice, here is a blessing: -** -** May you do good and not evil. -** May you find forgiveness for yourself and forgive others. -** May you share freely, never taking more than you give. -** -************************************************************************* -** This file implements a tokenizer for fts3 based on the ICU library. +** File control method. For custom operations on an rbuVfs-file. */ -#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) -#ifdef SQLITE_ENABLE_ICU +static int rbuVfsFileControl(sqlite3_file *pFile, int op, void *pArg){ + rbu_file *p = (rbu_file *)pFile; + int (*xControl)(sqlite3_file*,int,void*) = p->pReal->pMethods->xFileControl; + int rc; -/* #include <assert.h> */ -/* #include <string.h> */ + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) + || p->openFlags & (SQLITE_OPEN_TRANSIENT_DB|SQLITE_OPEN_TEMP_JOURNAL) + ); + if( op==SQLITE_FCNTL_RBU ){ + sqlite3rbu *pRbu = (sqlite3rbu*)pArg; + + /* First try to find another RBU vfs lower down in the vfs stack. If + ** one is found, this vfs will operate in pass-through mode. The lower + ** level vfs will do the special RBU handling. */ + rc = xControl(p->pReal, op, pArg); + + if( rc==SQLITE_NOTFOUND ){ + /* Now search for a zipvfs instance lower down in the VFS stack. If + ** one is found, this is an error. */ + void *dummy = 0; + rc = xControl(p->pReal, SQLITE_FCNTL_ZIPVFS, &dummy); + if( rc==SQLITE_OK ){ + rc = SQLITE_ERROR; + pRbu->zErrmsg = sqlite3_mprintf("rbu/zipvfs setup error"); + }else if( rc==SQLITE_NOTFOUND ){ + pRbu->pTargetFd = p; + p->pRbu = pRbu; + if( p->pWalFd ) p->pWalFd->pRbu = pRbu; + rc = SQLITE_OK; + } + } + return rc; + } -#include <unicode/ubrk.h> -/* #include <unicode/ucol.h> */ -/* #include <unicode/ustring.h> */ -#include <unicode/utf16.h> + rc = xControl(p->pReal, op, pArg); + if( rc==SQLITE_OK && op==SQLITE_FCNTL_VFSNAME ){ + rbu_vfs *pRbuVfs = p->pRbuVfs; + char *zIn = *(char**)pArg; + char *zOut = sqlite3_mprintf("rbu(%s)/%z", pRbuVfs->base.zName, zIn); + *(char**)pArg = zOut; + if( zOut==0 ) rc = SQLITE_NOMEM; + } -typedef struct IcuTokenizer IcuTokenizer; -typedef struct IcuCursor IcuCursor; + return rc; +} -struct IcuTokenizer { - sqlite3_tokenizer base; - char *zLocale; -}; +/* +** Return the sector-size in bytes for an rbuVfs-file. +*/ +static int rbuVfsSectorSize(sqlite3_file *pFile){ + rbu_file *p = (rbu_file *)pFile; + return p->pReal->pMethods->xSectorSize(p->pReal); +} -struct IcuCursor { - sqlite3_tokenizer_cursor base; +/* +** Return the device characteristic flags supported by an rbuVfs-file. +*/ +static int rbuVfsDeviceCharacteristics(sqlite3_file *pFile){ + rbu_file *p = (rbu_file *)pFile; + return p->pReal->pMethods->xDeviceCharacteristics(p->pReal); +} - UBreakIterator *pIter; /* ICU break-iterator object */ - int nChar; /* Number of UChar elements in pInput */ - UChar *aChar; /* Copy of input using utf-16 encoding */ - int *aOffset; /* Offsets of each character in utf-8 input */ +/* +** Take or release a shared-memory lock. +*/ +static int rbuVfsShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ + rbu_file *p = (rbu_file*)pFile; + sqlite3rbu *pRbu = p->pRbu; + int rc = SQLITE_OK; - int nBuffer; - char *zBuffer; +#ifdef SQLITE_AMALGAMATION + assert( WAL_CKPT_LOCK==1 ); +#endif - int iToken; -}; + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) ); + if( pRbu && (pRbu->eStage==RBU_STAGE_OAL || pRbu->eStage==RBU_STAGE_MOVE) ){ + /* Magic number 1 is the WAL_CKPT_LOCK lock. Preventing SQLite from + ** taking this lock also prevents any checkpoints from occurring. + ** todo: really, it's not clear why this might occur, as + ** wal_autocheckpoint ought to be turned off. */ + if( ofst==WAL_LOCK_CKPT && n==1 ) rc = SQLITE_BUSY; + }else{ + int bCapture = 0; + if( n==1 && (flags & SQLITE_SHM_EXCLUSIVE) + && pRbu && pRbu->eStage==RBU_STAGE_CAPTURE + && (ofst==WAL_LOCK_WRITE || ofst==WAL_LOCK_CKPT || ofst==WAL_LOCK_READ0) + ){ + bCapture = 1; + } + + if( bCapture==0 || 0==(flags & SQLITE_SHM_UNLOCK) ){ + rc = p->pReal->pMethods->xShmLock(p->pReal, ofst, n, flags); + if( bCapture && rc==SQLITE_OK ){ + pRbu->mLock |= (1 << ofst); + } + } + } + + return rc; +} /* -** Create a new tokenizer instance. +** Obtain a pointer to a mapping of a single 32KiB page of the *-shm file. */ -static int icuCreate( - int argc, /* Number of entries in argv[] */ - const char * const *argv, /* Tokenizer creation arguments */ - sqlite3_tokenizer **ppTokenizer /* OUT: Created tokenizer */ +static int rbuVfsShmMap( + sqlite3_file *pFile, + int iRegion, + int szRegion, + int isWrite, + void volatile **pp ){ - IcuTokenizer *p; - int n = 0; + rbu_file *p = (rbu_file*)pFile; + int rc = SQLITE_OK; + int eStage = (p->pRbu ? p->pRbu->eStage : 0); + + /* If not in RBU_STAGE_OAL, allow this call to pass through. Or, if this + ** rbu is in the RBU_STAGE_OAL state, use heap memory for *-shm space + ** instead of a file on disk. */ + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) ); + if( eStage==RBU_STAGE_OAL || eStage==RBU_STAGE_MOVE ){ + if( iRegion<=p->nShm ){ + int nByte = (iRegion+1) * sizeof(char*); + char **apNew = (char**)sqlite3_realloc(p->apShm, nByte); + if( apNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(&apNew[p->nShm], 0, sizeof(char*) * (1 + iRegion - p->nShm)); + p->apShm = apNew; + p->nShm = iRegion+1; + } + } - if( argc>0 ){ - n = strlen(argv[0])+1; - } - p = (IcuTokenizer *)sqlite3_malloc(sizeof(IcuTokenizer)+n); - if( !p ){ - return SQLITE_NOMEM; - } - memset(p, 0, sizeof(IcuTokenizer)); + if( rc==SQLITE_OK && p->apShm[iRegion]==0 ){ + char *pNew = (char*)sqlite3_malloc(szRegion); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pNew, 0, szRegion); + p->apShm[iRegion] = pNew; + } + } - if( n ){ - p->zLocale = (char *)&p[1]; - memcpy(p->zLocale, argv[0], n); + if( rc==SQLITE_OK ){ + *pp = p->apShm[iRegion]; + }else{ + *pp = 0; + } + }else{ + assert( p->apShm==0 ); + rc = p->pReal->pMethods->xShmMap(p->pReal, iRegion, szRegion, isWrite, pp); } - *ppTokenizer = (sqlite3_tokenizer *)p; - - return SQLITE_OK; + return rc; } /* -** Destroy a tokenizer +** Memory barrier. */ -static int icuDestroy(sqlite3_tokenizer *pTokenizer){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - sqlite3_free(p); - return SQLITE_OK; +static void rbuVfsShmBarrier(sqlite3_file *pFile){ + rbu_file *p = (rbu_file *)pFile; + p->pReal->pMethods->xShmBarrier(p->pReal); } /* -** Prepare to begin tokenizing a particular string. The input -** string to be tokenized is pInput[0..nBytes-1]. A cursor -** used to incrementally tokenize this string is returned in -** *ppCursor. +** The xShmUnmap method. */ -static int icuOpen( - sqlite3_tokenizer *pTokenizer, /* The tokenizer */ - const char *zInput, /* Input string */ - int nInput, /* Length of zInput in bytes */ - sqlite3_tokenizer_cursor **ppCursor /* OUT: Tokenization cursor */ -){ - IcuTokenizer *p = (IcuTokenizer *)pTokenizer; - IcuCursor *pCsr; - - const int32_t opt = U_FOLD_CASE_DEFAULT; - UErrorCode status = U_ZERO_ERROR; - int nChar; - - UChar32 c; - int iInput = 0; - int iOut = 0; - - *ppCursor = 0; +static int rbuVfsShmUnmap(sqlite3_file *pFile, int delFlag){ + rbu_file *p = (rbu_file*)pFile; + int rc = SQLITE_OK; + int eStage = (p->pRbu ? p->pRbu->eStage : 0); - if( zInput==0 ){ - nInput = 0; - zInput = ""; - }else if( nInput<0 ){ - nInput = strlen(zInput); - } - nChar = nInput+1; - pCsr = (IcuCursor *)sqlite3_malloc( - sizeof(IcuCursor) + /* IcuCursor */ - ((nChar+3)&~3) * sizeof(UChar) + /* IcuCursor.aChar[] */ - (nChar+1) * sizeof(int) /* IcuCursor.aOffset[] */ - ); - if( !pCsr ){ - return SQLITE_NOMEM; + assert( p->openFlags & (SQLITE_OPEN_MAIN_DB|SQLITE_OPEN_TEMP_DB) ); + if( eStage==RBU_STAGE_OAL || eStage==RBU_STAGE_MOVE ){ + /* no-op */ + }else{ + /* Release the checkpointer and writer locks */ + rbuUnlockShm(p); + rc = p->pReal->pMethods->xShmUnmap(p->pReal, delFlag); } - memset(pCsr, 0, sizeof(IcuCursor)); - pCsr->aChar = (UChar *)&pCsr[1]; - pCsr->aOffset = (int *)&pCsr->aChar[(nChar+3)&~3]; + return rc; +} - pCsr->aOffset[iOut] = iInput; - U8_NEXT(zInput, iInput, nInput, c); - while( c>0 ){ - int isError = 0; - c = u_foldCase(c, opt); - U16_APPEND(pCsr->aChar, iOut, nChar, c, isError); - if( isError ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; - } - pCsr->aOffset[iOut] = iInput; +/* +** Given that zWal points to a buffer containing a wal file name passed to +** either the xOpen() or xAccess() VFS method, return a pointer to the +** file-handle opened by the same database connection on the corresponding +** database file. +*/ +static rbu_file *rbuFindMaindb(rbu_vfs *pRbuVfs, const char *zWal){ + rbu_file *pDb; + sqlite3_mutex_enter(pRbuVfs->mutex); + for(pDb=pRbuVfs->pMain; pDb && pDb->zWal!=zWal; pDb=pDb->pMainNext); + sqlite3_mutex_leave(pRbuVfs->mutex); + return pDb; +} - if( iInput<nInput ){ - U8_NEXT(zInput, iInput, nInput, c); - }else{ - c = 0; +/* +** Open an rbu file handle. +*/ +static int rbuVfsOpen( + sqlite3_vfs *pVfs, + const char *zName, + sqlite3_file *pFile, + int flags, + int *pOutFlags +){ + static sqlite3_io_methods rbuvfs_io_methods = { + 2, /* iVersion */ + rbuVfsClose, /* xClose */ + rbuVfsRead, /* xRead */ + rbuVfsWrite, /* xWrite */ + rbuVfsTruncate, /* xTruncate */ + rbuVfsSync, /* xSync */ + rbuVfsFileSize, /* xFileSize */ + rbuVfsLock, /* xLock */ + rbuVfsUnlock, /* xUnlock */ + rbuVfsCheckReservedLock, /* xCheckReservedLock */ + rbuVfsFileControl, /* xFileControl */ + rbuVfsSectorSize, /* xSectorSize */ + rbuVfsDeviceCharacteristics, /* xDeviceCharacteristics */ + rbuVfsShmMap, /* xShmMap */ + rbuVfsShmLock, /* xShmLock */ + rbuVfsShmBarrier, /* xShmBarrier */ + rbuVfsShmUnmap /* xShmUnmap */ + }; + rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs; + sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs; + rbu_file *pFd = (rbu_file *)pFile; + int rc = SQLITE_OK; + const char *zOpen = zName; + + memset(pFd, 0, sizeof(rbu_file)); + pFd->pReal = (sqlite3_file*)&pFd[1]; + pFd->pRbuVfs = pRbuVfs; + pFd->openFlags = flags; + if( zName ){ + if( flags & SQLITE_OPEN_MAIN_DB ){ + /* A main database has just been opened. The following block sets + ** (pFd->zWal) to point to a buffer owned by SQLite that contains + ** the name of the *-wal file this db connection will use. SQLite + ** happens to pass a pointer to this buffer when using xAccess() + ** or xOpen() to operate on the *-wal file. */ + int n = strlen(zName); + const char *z = &zName[n]; + if( flags & SQLITE_OPEN_URI ){ + int odd = 0; + while( 1 ){ + if( z[0]==0 ){ + odd = 1 - odd; + if( odd && z[1]==0 ) break; + } + z++; + } + z += 2; + }else{ + while( *z==0 ) z++; + } + z += (n + 8 + 1); + pFd->zWal = z; + } + else if( flags & SQLITE_OPEN_WAL ){ + rbu_file *pDb = rbuFindMaindb(pRbuVfs, zName); + if( pDb ){ + if( pDb->pRbu && pDb->pRbu->eStage==RBU_STAGE_OAL ){ + /* This call is to open a *-wal file. Intead, open the *-oal. This + ** code ensures that the string passed to xOpen() is terminated by a + ** pair of '\0' bytes in case the VFS attempts to extract a URI + ** parameter from it. */ + int nCopy = strlen(zName); + char *zCopy = sqlite3_malloc(nCopy+2); + if( zCopy ){ + memcpy(zCopy, zName, nCopy); + zCopy[nCopy-3] = 'o'; + zCopy[nCopy] = '\0'; + zCopy[nCopy+1] = '\0'; + zOpen = (const char*)(pFd->zDel = zCopy); + }else{ + rc = SQLITE_NOMEM; + } + pFd->pRbu = pDb->pRbu; + } + pDb->pWalFd = pFd; + } } } - pCsr->pIter = ubrk_open(UBRK_WORD, p->zLocale, pCsr->aChar, iOut, &status); - if( !U_SUCCESS(status) ){ - sqlite3_free(pCsr); - return SQLITE_ERROR; + if( rc==SQLITE_OK ){ + rc = pRealVfs->xOpen(pRealVfs, zOpen, pFd->pReal, flags, pOutFlags); + } + if( pFd->pReal->pMethods ){ + /* The xOpen() operation has succeeded. Set the sqlite3_file.pMethods + ** pointer and, if the file is a main database file, link it into the + ** mutex protected linked list of all such files. */ + pFile->pMethods = &rbuvfs_io_methods; + if( flags & SQLITE_OPEN_MAIN_DB ){ + sqlite3_mutex_enter(pRbuVfs->mutex); + pFd->pMainNext = pRbuVfs->pMain; + pRbuVfs->pMain = pFd; + sqlite3_mutex_leave(pRbuVfs->mutex); + } + }else{ + sqlite3_free(pFd->zDel); } - pCsr->nChar = iOut; - ubrk_first(pCsr->pIter); - *ppCursor = (sqlite3_tokenizer_cursor *)pCsr; - return SQLITE_OK; + return rc; } /* -** Close a tokenization cursor previously opened by a call to icuOpen(). +** Delete the file located at zPath. */ -static int icuClose(sqlite3_tokenizer_cursor *pCursor){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - ubrk_close(pCsr->pIter); - sqlite3_free(pCsr->zBuffer); - sqlite3_free(pCsr); - return SQLITE_OK; +static int rbuVfsDelete(sqlite3_vfs *pVfs, const char *zPath, int dirSync){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xDelete(pRealVfs, zPath, dirSync); } /* -** Extract the next token from a tokenization cursor. +** Test for access permissions. Return true if the requested permission +** is available, or false otherwise. */ -static int icuNext( - sqlite3_tokenizer_cursor *pCursor, /* Cursor returned by simpleOpen */ - const char **ppToken, /* OUT: *ppToken is the token text */ - int *pnBytes, /* OUT: Number of bytes in token */ - int *piStartOffset, /* OUT: Starting offset of token */ - int *piEndOffset, /* OUT: Ending offset of token */ - int *piPosition /* OUT: Position integer of token */ +static int rbuVfsAccess( + sqlite3_vfs *pVfs, + const char *zPath, + int flags, + int *pResOut ){ - IcuCursor *pCsr = (IcuCursor *)pCursor; - - int iStart = 0; - int iEnd = 0; - int nByte = 0; - - while( iStart==iEnd ){ - UChar32 c; + rbu_vfs *pRbuVfs = (rbu_vfs*)pVfs; + sqlite3_vfs *pRealVfs = pRbuVfs->pRealVfs; + int rc; - iStart = ubrk_current(pCsr->pIter); - iEnd = ubrk_next(pCsr->pIter); - if( iEnd==UBRK_DONE ){ - return SQLITE_DONE; - } + rc = pRealVfs->xAccess(pRealVfs, zPath, flags, pResOut); - while( iStart<iEnd ){ - int iWhite = iStart; - U16_NEXT(pCsr->aChar, iWhite, pCsr->nChar, c); - if( u_isspace(c) ){ - iStart = iWhite; + /* If this call is to check if a *-wal file associated with an RBU target + ** database connection exists, and the RBU update is in RBU_STAGE_OAL, + ** the following special handling is activated: + ** + ** a) if the *-wal file does exist, return SQLITE_CANTOPEN. This + ** ensures that the RBU extension never tries to update a database + ** in wal mode, even if the first page of the database file has + ** been damaged. + ** + ** b) if the *-wal file does not exist, claim that it does anyway, + ** causing SQLite to call xOpen() to open it. This call will also + ** be intercepted (see the rbuVfsOpen() function) and the *-oal + ** file opened instead. + */ + if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ + rbu_file *pDb = rbuFindMaindb(pRbuVfs, zPath); + if( pDb && pDb->pRbu && pDb->pRbu->eStage==RBU_STAGE_OAL ){ + if( *pResOut ){ + rc = SQLITE_CANTOPEN; }else{ - break; + *pResOut = 1; } } - assert(iStart<=iEnd); } - do { - UErrorCode status = U_ZERO_ERROR; - if( nByte ){ - char *zNew = sqlite3_realloc(pCsr->zBuffer, nByte); - if( !zNew ){ - return SQLITE_NOMEM; - } - pCsr->zBuffer = zNew; - pCsr->nBuffer = nByte; - } + return rc; +} - u_strToUTF8( - pCsr->zBuffer, pCsr->nBuffer, &nByte, /* Output vars */ - &pCsr->aChar[iStart], iEnd-iStart, /* Input vars */ - &status /* Output success/failure */ - ); - } while( nByte>pCsr->nBuffer ); +/* +** Populate buffer zOut with the full canonical pathname corresponding +** to the pathname in zPath. zOut is guaranteed to point to a buffer +** of at least (DEVSYM_MAX_PATHNAME+1) bytes. +*/ +static int rbuVfsFullPathname( + sqlite3_vfs *pVfs, + const char *zPath, + int nOut, + char *zOut +){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xFullPathname(pRealVfs, zPath, nOut, zOut); +} - *ppToken = pCsr->zBuffer; - *pnBytes = nByte; - *piStartOffset = pCsr->aOffset[iStart]; - *piEndOffset = pCsr->aOffset[iEnd]; - *piPosition = pCsr->iToken++; +#ifndef SQLITE_OMIT_LOAD_EXTENSION +/* +** Open the dynamic library located at zPath and return a handle. +*/ +static void *rbuVfsDlOpen(sqlite3_vfs *pVfs, const char *zPath){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xDlOpen(pRealVfs, zPath); +} - return SQLITE_OK; +/* +** Populate the buffer zErrMsg (size nByte bytes) with a human readable +** utf-8 string describing the most recent error encountered associated +** with dynamic libraries. +*/ +static void rbuVfsDlError(sqlite3_vfs *pVfs, int nByte, char *zErrMsg){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + pRealVfs->xDlError(pRealVfs, nByte, zErrMsg); } /* -** The set of routines that implement the simple tokenizer +** Return a pointer to the symbol zSymbol in the dynamic library pHandle. */ -static const sqlite3_tokenizer_module icuTokenizerModule = { - 0, /* iVersion */ - icuCreate, /* xCreate */ - icuDestroy, /* xCreate */ - icuOpen, /* xOpen */ - icuClose, /* xClose */ - icuNext, /* xNext */ -}; +static void (*rbuVfsDlSym( + sqlite3_vfs *pVfs, + void *pArg, + const char *zSym +))(void){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xDlSym(pRealVfs, pArg, zSym); +} /* -** Set *ppModule to point at the implementation of the ICU tokenizer. +** Close the dynamic library handle pHandle. */ -SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( - sqlite3_tokenizer_module const**ppModule -){ - *ppModule = &icuTokenizerModule; +static void rbuVfsDlClose(sqlite3_vfs *pVfs, void *pHandle){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + pRealVfs->xDlClose(pRealVfs, pHandle); } +#endif /* SQLITE_OMIT_LOAD_EXTENSION */ -#endif /* defined(SQLITE_ENABLE_ICU) */ -#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ +/* +** Populate the buffer pointed to by zBufOut with nByte bytes of +** random data. +*/ +static int rbuVfsRandomness(sqlite3_vfs *pVfs, int nByte, char *zBufOut){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xRandomness(pRealVfs, nByte, zBufOut); +} -/************** End of fts3_icu.c ********************************************/ +/* +** Sleep for nMicro microseconds. Return the number of microseconds +** actually slept. +*/ +static int rbuVfsSleep(sqlite3_vfs *pVfs, int nMicro){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xSleep(pRealVfs, nMicro); +} + +/* +** Return the current time as a Julian Day number in *pTimeOut. +*/ +static int rbuVfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){ + sqlite3_vfs *pRealVfs = ((rbu_vfs*)pVfs)->pRealVfs; + return pRealVfs->xCurrentTime(pRealVfs, pTimeOut); +} + +/* +** No-op. +*/ +static int rbuVfsGetLastError(sqlite3_vfs *pVfs, int a, char *b){ + return 0; +} + +/* +** Deregister and destroy an RBU vfs created by an earlier call to +** sqlite3rbu_create_vfs(). +*/ +SQLITE_API void SQLITE_STDCALL sqlite3rbu_destroy_vfs(const char *zName){ + sqlite3_vfs *pVfs = sqlite3_vfs_find(zName); + if( pVfs && pVfs->xOpen==rbuVfsOpen ){ + sqlite3_mutex_free(((rbu_vfs*)pVfs)->mutex); + sqlite3_vfs_unregister(pVfs); + sqlite3_free(pVfs); + } +} + +/* +** Create an RBU VFS named zName that accesses the underlying file-system +** via existing VFS zParent. The new object is registered as a non-default +** VFS with SQLite before returning. +*/ +SQLITE_API int SQLITE_STDCALL sqlite3rbu_create_vfs(const char *zName, const char *zParent){ + + /* Template for VFS */ + static sqlite3_vfs vfs_template = { + 1, /* iVersion */ + 0, /* szOsFile */ + 0, /* mxPathname */ + 0, /* pNext */ + 0, /* zName */ + 0, /* pAppData */ + rbuVfsOpen, /* xOpen */ + rbuVfsDelete, /* xDelete */ + rbuVfsAccess, /* xAccess */ + rbuVfsFullPathname, /* xFullPathname */ + +#ifndef SQLITE_OMIT_LOAD_EXTENSION + rbuVfsDlOpen, /* xDlOpen */ + rbuVfsDlError, /* xDlError */ + rbuVfsDlSym, /* xDlSym */ + rbuVfsDlClose, /* xDlClose */ +#else + 0, 0, 0, 0, +#endif + + rbuVfsRandomness, /* xRandomness */ + rbuVfsSleep, /* xSleep */ + rbuVfsCurrentTime, /* xCurrentTime */ + rbuVfsGetLastError, /* xGetLastError */ + 0, /* xCurrentTimeInt64 (version 2) */ + 0, 0, 0 /* Unimplemented version 3 methods */ + }; + + rbu_vfs *pNew = 0; /* Newly allocated VFS */ + int nName; + int rc = SQLITE_OK; + + int nByte; + nName = strlen(zName); + nByte = sizeof(rbu_vfs) + nName + 1; + pNew = (rbu_vfs*)sqlite3_malloc(nByte); + if( pNew==0 ){ + rc = SQLITE_NOMEM; + }else{ + sqlite3_vfs *pParent; /* Parent VFS */ + memset(pNew, 0, nByte); + pParent = sqlite3_vfs_find(zParent); + if( pParent==0 ){ + rc = SQLITE_NOTFOUND; + }else{ + char *zSpace; + memcpy(&pNew->base, &vfs_template, sizeof(sqlite3_vfs)); + pNew->base.mxPathname = pParent->mxPathname; + pNew->base.szOsFile = sizeof(rbu_file) + pParent->szOsFile; + pNew->pRealVfs = pParent; + pNew->base.zName = (const char*)(zSpace = (char*)&pNew[1]); + memcpy(zSpace, zName, nName); + + /* Allocate the mutex and register the new VFS (not as the default) */ + pNew->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); + if( pNew->mutex==0 ){ + rc = SQLITE_NOMEM; + }else{ + rc = sqlite3_vfs_register(&pNew->base, 0); + } + } + + if( rc!=SQLITE_OK ){ + sqlite3_mutex_free(pNew->mutex); + sqlite3_free(pNew); + } + } + + return rc; +} + + +/**************************************************************************/ + +#endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_RBU) */ + +/************** End of sqlite3rbu.c ******************************************/ /************** Begin file dbstat.c ******************************************/ /* ** 2010 July 12 @@ -155233,6 +160988,7 @@ SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule( ** for an example implementation. */ +/* #include "sqliteInt.h" ** Requires access to internal data structures ** */ #if (defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST)) \ && !defined(SQLITE_OMIT_VIRTUALTABLE) @@ -155835,7 +161591,7 @@ static int statRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){ /* ** Invoke this routine to register the "dbstat" virtual table module */ -SQLITE_API int SQLITE_STDCALL sqlite3_dbstat_register(sqlite3 *db){ +SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3 *db){ static sqlite3_module dbstat_module = { 0, /* iVersion */ statConnect, /* xCreate */ @@ -155860,6 +161616,8 @@ SQLITE_API int SQLITE_STDCALL sqlite3_dbstat_register(sqlite3 *db){ }; return sqlite3_create_module(db, "dbstat", &dbstat_module, 0); } +#elif defined(SQLITE_ENABLE_DBSTAT_VTAB) +SQLITE_PRIVATE int sqlite3DbstatRegister(sqlite3 *db){ return SQLITE_OK; } #endif /* SQLITE_ENABLE_DBSTAT_VTAB */ /************** End of dbstat.c **********************************************/ diff --git a/db/sqlite3/src/sqlite3.h b/db/sqlite3/src/sqlite3.h index 023da28bd7b8a51478bc2be04f720fb79de0a809..d3f272c2b684b12c72322f27ca2d37f683df4b06 100644 --- a/db/sqlite3/src/sqlite3.h +++ b/db/sqlite3/src/sqlite3.h @@ -23,7 +23,7 @@ ** ** The official C-language API documentation for SQLite is derived ** from comments in this file. This file is the authoritative source -** on how SQLite interfaces are suppose to operate. +** on how SQLite interfaces are supposed to operate. ** ** The name of this file under configuration management is "sqlite.h.in". ** The makefile makes some minor changes to this file (such as inserting @@ -111,9 +111,9 @@ extern "C" { ** [sqlite3_libversion_number()], [sqlite3_sourceid()], ** [sqlite_version()] and [sqlite_source_id()]. */ -#define SQLITE_VERSION "3.8.10.1" -#define SQLITE_VERSION_NUMBER 3008010 -#define SQLITE_SOURCE_ID "2015-05-09 12:14:55 05b4b1f2a937c06c90db70c09890038f6c98ec40" +#define SQLITE_VERSION "3.8.11.1" +#define SQLITE_VERSION_NUMBER 3008011 +#define SQLITE_SOURCE_ID "2015-07-29 20:00:57 cf538e2783e468bbc25e7cb2a9ee64d3e0e80b2f" /* ** CAPI3REF: Run-Time Library Version Numbers @@ -963,6 +963,14 @@ struct sqlite3_io_methods { ** circumstances in order to fix a problem with priority inversion. ** Applications should <em>not</em> use this file-control. ** +** <li>[[SQLITE_FCNTL_ZIPVFS]] +** The [SQLITE_FCNTL_ZIPVFS] opcode is implemented by zipvfs only. All other +** VFS should return SQLITE_NOTFOUND for this opcode. +** +** <li>[[SQLITE_FCNTL_RBU]] +** The [SQLITE_FCNTL_RBU] opcode is implemented by the special VFS used by +** the RBU extension only. All other VFS should return SQLITE_NOTFOUND for +** this opcode. ** </ul> */ #define SQLITE_FCNTL_LOCKSTATE 1 @@ -988,6 +996,8 @@ struct sqlite3_io_methods { #define SQLITE_FCNTL_COMMIT_PHASETWO 22 #define SQLITE_FCNTL_WIN32_SET_HANDLE 23 #define SQLITE_FCNTL_WAL_BLOCK 24 +#define SQLITE_FCNTL_ZIPVFS 25 +#define SQLITE_FCNTL_RBU 26 /* deprecated names */ #define SQLITE_GET_LOCKPROXYFILE SQLITE_FCNTL_GET_LOCKPROXYFILE @@ -3390,7 +3400,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_stmt_busy(sqlite3_stmt*); ** Some interfaces require a protected sqlite3_value. Other interfaces ** will accept either a protected or an unprotected sqlite3_value. ** Every interface that accepts sqlite3_value arguments specifies -** whether or not it requires a protected sqlite3_value. +** whether or not it requires a protected sqlite3_value. The +** [sqlite3_value_dup()] interface can be used to construct a new +** protected sqlite3_value from an unprotected sqlite3_value. ** ** The terms "protected" and "unprotected" refer to whether or not ** a mutex is held. An internal mutex is held for a protected @@ -3550,6 +3562,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_bind_text64(sqlite3_stmt*, int, const char void(*)(void*), unsigned char encoding); SQLITE_API int SQLITE_STDCALL sqlite3_bind_value(sqlite3_stmt*, int, const sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob(sqlite3_stmt*, int, int n); +SQLITE_API int SQLITE_STDCALL sqlite3_bind_zeroblob64(sqlite3_stmt*, int, sqlite3_uint64); /* ** CAPI3REF: Number Of SQL Parameters @@ -3893,8 +3906,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** KEYWORDS: {column access functions} ** METHOD: sqlite3_stmt ** -** These routines form the "result set" interface. -** ** ^These routines return information about a single column of the current ** result row of a query. ^In every case the first argument is a pointer ** to the [prepared statement] that is being evaluated (the [sqlite3_stmt*] @@ -3954,13 +3965,14 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** even empty strings, are always zero-terminated. ^The return ** value from sqlite3_column_blob() for a zero-length BLOB is a NULL pointer. ** -** ^The object returned by [sqlite3_column_value()] is an -** [unprotected sqlite3_value] object. An unprotected sqlite3_value object -** may only be used with [sqlite3_bind_value()] and [sqlite3_result_value()]. +** <b>Warning:</b> ^The object returned by [sqlite3_column_value()] is an +** [unprotected sqlite3_value] object. In a multithreaded environment, +** an unprotected sqlite3_value object may only be used safely with +** [sqlite3_bind_value()] and [sqlite3_result_value()]. ** If the [unprotected sqlite3_value] object returned by ** [sqlite3_column_value()] is used in any other way, including calls ** to routines like [sqlite3_value_int()], [sqlite3_value_text()], -** or [sqlite3_value_bytes()], then the behavior is undefined. +** or [sqlite3_value_bytes()], the behavior is not threadsafe. ** ** These routines attempt to convert the value where appropriate. ^For ** example, if the internal representation is FLOAT and a text result @@ -3991,12 +4003,6 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** </table> ** </blockquote>)^ ** -** The table above makes reference to standard C library functions atoi() -** and atof(). SQLite does not really use these functions. It has its -** own equivalent internal routines. The atoi() and atof() names are -** used in the table for brevity and because they are familiar to most -** C programmers. -** ** Note that when type conversions occur, pointers returned by prior ** calls to sqlite3_column_blob(), sqlite3_column_text(), and/or ** sqlite3_column_text16() may be invalidated. @@ -4021,7 +4027,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** of conversion are done in place when it is possible, but sometimes they ** are not possible and in those cases prior pointers are invalidated. ** -** The safest and easiest to remember policy is to invoke these routines +** The safest policy is to invoke these routines ** in one of the following ways: ** ** <ul> @@ -4041,7 +4047,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_data_count(sqlite3_stmt *pStmt); ** ^The pointers returned are valid until a type conversion occurs as ** described above, or until [sqlite3_step()] or [sqlite3_reset()] or ** [sqlite3_finalize()] is called. ^The memory space used to hold strings -** and BLOBs is freed automatically. Do <b>not</b> pass the pointers returned +** and BLOBs is freed automatically. Do <em>not</em> pass the pointers returned ** from [sqlite3_column_blob()], [sqlite3_column_text()], etc. into ** [sqlite3_free()]. ** @@ -4291,12 +4297,12 @@ SQLITE_API SQLITE_DEPRECATED int SQLITE_STDCALL sqlite3_memory_alarm(void(*)(voi #endif /* -** CAPI3REF: Obtaining SQL Function Parameter Values +** CAPI3REF: Obtaining SQL Values ** METHOD: sqlite3_value ** ** The C-language implementation of SQL functions and aggregates uses ** this set of interface routines to access the parameter values on -** the function or aggregate. +** the function or aggregate. ** ** The xFunc (for scalar functions) or xStep (for aggregates) parameters ** to [sqlite3_create_function()] and [sqlite3_create_function16()] @@ -4349,6 +4355,23 @@ SQLITE_API const void *SQLITE_STDCALL sqlite3_value_text16be(sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_value_type(sqlite3_value*); SQLITE_API int SQLITE_STDCALL sqlite3_value_numeric_type(sqlite3_value*); +/* +** CAPI3REF: Copy And Free SQL Values +** METHOD: sqlite3_value +** +** ^The sqlite3_value_dup(V) interface makes a copy of the [sqlite3_value] +** object D and returns a pointer to that copy. ^The [sqlite3_value] returned +** is a [protected sqlite3_value] object even if the input is not. +** ^The sqlite3_value_dup(V) interface returns NULL if V is NULL or if a +** memory allocation fails. +** +** ^The sqlite3_value_free(V) interface frees an [sqlite3_value] object +** previously obtained from [sqlite3_value_dup()]. ^If V is a NULL pointer +** then sqlite3_value_free(V) is a harmless no-op. +*/ +SQLITE_API SQLITE_EXPERIMENTAL sqlite3_value *SQLITE_STDCALL sqlite3_value_dup(const sqlite3_value*); +SQLITE_API SQLITE_EXPERIMENTAL void SQLITE_STDCALL sqlite3_value_free(sqlite3_value*); + /* ** CAPI3REF: Obtain Aggregate Function Context ** METHOD: sqlite3_context @@ -4512,9 +4535,9 @@ typedef void (*sqlite3_destructor_type)(void*); ** to by the second parameter and which is N bytes long where N is the ** third parameter. ** -** ^The sqlite3_result_zeroblob() interfaces set the result of -** the application-defined function to be a BLOB containing all zero -** bytes and N bytes in size, where N is the value of the 2nd parameter. +** ^The sqlite3_result_zeroblob(C,N) and sqlite3_result_zeroblob64(C,N) +** interfaces set the result of the application-defined function to be +** a BLOB containing all zero bytes and N bytes in size. ** ** ^The sqlite3_result_double() interface sets the result from ** an application-defined function to be a floating point value specified @@ -4596,7 +4619,7 @@ typedef void (*sqlite3_destructor_type)(void*); ** from [sqlite3_malloc()] before it returns. ** ** ^The sqlite3_result_value() interface sets the result of -** the application-defined function to be a copy the +** the application-defined function to be a copy of the ** [unprotected sqlite3_value] object specified by the 2nd parameter. ^The ** sqlite3_result_value() interface makes a copy of the [sqlite3_value] ** so that the [sqlite3_value] specified in the parameter may change or @@ -4629,6 +4652,7 @@ SQLITE_API void SQLITE_STDCALL sqlite3_result_text16le(sqlite3_context*, const v SQLITE_API void SQLITE_STDCALL sqlite3_result_text16be(sqlite3_context*, const void*, int,void(*)(void*)); SQLITE_API void SQLITE_STDCALL sqlite3_result_value(sqlite3_context*, sqlite3_value*); SQLITE_API void SQLITE_STDCALL sqlite3_result_zeroblob(sqlite3_context*, int n); +SQLITE_API int SQLITE_STDCALL sqlite3_result_zeroblob64(sqlite3_context*, sqlite3_uint64 n); /* ** CAPI3REF: Define New Collating Sequences @@ -5872,7 +5896,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_blob_open( ** ** ^This function sets the database handle error code and message. */ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); +SQLITE_API int SQLITE_STDCALL sqlite3_blob_reopen(sqlite3_blob *, sqlite3_int64); /* ** CAPI3REF: Close A BLOB Handle @@ -6269,6 +6293,9 @@ SQLITE_API int SQLITE_STDCALL sqlite3_mutex_notheld(sqlite3_mutex*); #define SQLITE_MUTEX_STATIC_APP1 8 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP2 9 /* For use by application */ #define SQLITE_MUTEX_STATIC_APP3 10 /* For use by application */ +#define SQLITE_MUTEX_STATIC_VFS1 11 /* For use by built-in VFS */ +#define SQLITE_MUTEX_STATIC_VFS2 12 /* For use by extension VFS */ +#define SQLITE_MUTEX_STATIC_VFS3 13 /* For use by application VFS */ /* ** CAPI3REF: Retrieve the mutex for a database connection @@ -7682,7 +7709,7 @@ SQLITE_API int SQLITE_STDCALL sqlite3_vtab_on_conflict(sqlite3 *); ** ** See also: [sqlite3_stmt_scanstatus_reset()] */ -SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_stmt_scanstatus( +SQLITE_API int SQLITE_STDCALL sqlite3_stmt_scanstatus( sqlite3_stmt *pStmt, /* Prepared statement for which info desired */ int idx, /* Index of loop to report on */ int iScanStatusOp, /* Information desired. SQLITE_SCANSTAT_* */ @@ -7698,7 +7725,7 @@ SQLITE_API SQLITE_EXPERIMENTAL int SQLITE_STDCALL sqlite3_stmt_scanstatus( ** This API is only available if the library is built with pre-processor ** symbol [SQLITE_ENABLE_STMT_SCANSTATUS] defined. */ -SQLITE_API SQLITE_EXPERIMENTAL void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); +SQLITE_API void SQLITE_STDCALL sqlite3_stmt_scanstatus_reset(sqlite3_stmt*); /* @@ -7813,6 +7840,8 @@ struct sqlite3_rtree_query_info { int eParentWithin; /* Visibility of parent node */ int eWithin; /* OUT: Visiblity */ sqlite3_rtree_dbl rScore; /* OUT: Write the score here */ + /* The following fields are only available in 3.8.11 and later */ + sqlite3_value **apSqlParam; /* Original SQL values of parameters */ }; /*