1 |
jpp |
1.1 |
Backport of upstream commit db7d62c8d5: |
2 |
|
|
|
3 |
|
|
Avoid attempting to mmap memory from an offset that is not a multiple of |
4 |
|
|
the system page size on systems with page sizes larger than 32KB. |
5 |
|
|
|
6 |
|
|
https://www.sqlite.org/src/info/db7d62c8d58eb1e8654a762c9b199ae4e2759038 |
7 |
|
|
|
8 |
|
|
Index: src/os_unix.c |
9 |
|
|
================================================================== |
10 |
|
|
--- src/os_unix.c |
11 |
|
|
+++ src/os_unix.c |
12 |
|
|
@@ -321,10 +321,11 @@ |
13 |
|
|
return geteuid() ? 0 : fchown(fd,uid,gid); |
14 |
|
|
} |
15 |
|
|
|
16 |
|
|
/* Forward reference */ |
17 |
|
|
static int openDirectory(const char*, int*); |
18 |
|
|
+static int unixGetpagesize(void); |
19 |
|
|
|
20 |
|
|
/* |
21 |
|
|
** Many system calls are accessed through pointer-to-functions so that |
22 |
|
|
** they may be overridden at runtime to facilitate fault injection during |
23 |
|
|
** testing and sandboxing. The following array holds the names and pointers |
24 |
|
|
@@ -443,10 +444,13 @@ |
25 |
|
|
{ "mremap", (sqlite3_syscall_ptr)mremap, 0 }, |
26 |
|
|
#else |
27 |
|
|
{ "mremap", (sqlite3_syscall_ptr)0, 0 }, |
28 |
|
|
#endif |
29 |
|
|
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) |
30 |
|
|
+ |
31 |
|
|
+ { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, |
32 |
|
|
+#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) |
33 |
|
|
|
34 |
|
|
}; /* End of the overrideable system calls */ |
35 |
|
|
|
36 |
|
|
/* |
37 |
|
|
** This is the xSetSystemCall() method of sqlite3_vfs for all of the |
38 |
|
|
@@ -4103,10 +4107,40 @@ |
39 |
|
|
#endif |
40 |
|
|
|
41 |
|
|
return rc; |
42 |
|
|
} |
43 |
|
|
|
44 |
|
|
+/* |
45 |
|
|
+** Return the system page size. |
46 |
|
|
+** |
47 |
|
|
+** This function should not be called directly by other code in this file. |
48 |
|
|
+** Instead, it should be called via macro osGetpagesize(). |
49 |
|
|
+*/ |
50 |
|
|
+static int unixGetpagesize(void){ |
51 |
|
|
+#if defined(_BSD_SOURCE) |
52 |
|
|
+ return getpagesize(); |
53 |
|
|
+#else |
54 |
|
|
+ return (int)sysconf(_SC_PAGESIZE); |
55 |
|
|
+#endif |
56 |
|
|
+} |
57 |
|
|
+ |
58 |
|
|
+/* |
59 |
|
|
+** Return the minimum number of 32KB shm regions that should be mapped at |
60 |
|
|
+** a time, assuming that each mapping must be an integer multiple of the |
61 |
|
|
+** current system page-size. |
62 |
|
|
+** |
63 |
|
|
+** Usually, this is 1. The exception seems to be systems that are configured |
64 |
|
|
+** to use 64KB pages - in this case each mapping must cover at least two |
65 |
|
|
+** shm regions. |
66 |
|
|
+*/ |
67 |
|
|
+static int unixShmRegionPerMap(void){ |
68 |
|
|
+ int shmsz = 32*1024; /* SHM region size */ |
69 |
|
|
+ int pgsz = osGetpagesize(); /* System page size */ |
70 |
|
|
+ assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ |
71 |
|
|
+ if( pgsz<shmsz ) return 1; |
72 |
|
|
+ return pgsz/shmsz; |
73 |
|
|
+} |
74 |
|
|
|
75 |
|
|
/* |
76 |
|
|
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. |
77 |
|
|
** |
78 |
|
|
** This is not a VFS shared-memory method; it is a utility function called |
79 |
|
|
@@ -4114,14 +4148,15 @@ |
80 |
|
|
*/ |
81 |
|
|
static void unixShmPurge(unixFile *pFd){ |
82 |
|
|
unixShmNode *p = pFd->pInode->pShmNode; |
83 |
|
|
assert( unixMutexHeld() ); |
84 |
|
|
if( p && p->nRef==0 ){ |
85 |
|
|
+ int nShmPerMap = unixShmRegionPerMap(); |
86 |
|
|
int i; |
87 |
|
|
assert( p->pInode==pFd->pInode ); |
88 |
|
|
sqlite3_mutex_free(p->mutex); |
89 |
|
|
- for(i=0; i<p->nRegion; i++){ |
90 |
|
|
+ for(i=0; i<p->nRegion; i+=nShmPerMap){ |
91 |
|
|
if( p->h>=0 ){ |
92 |
|
|
osMunmap(p->apRegion[i], p->szRegion); |
93 |
|
|
}else{ |
94 |
|
|
sqlite3_free(p->apRegion[i]); |
95 |
|
|
} |
96 |
|
|
@@ -4324,10 +4359,12 @@ |
97 |
|
|
){ |
98 |
|
|
unixFile *pDbFd = (unixFile*)fd; |
99 |
|
|
unixShm *p; |
100 |
|
|
unixShmNode *pShmNode; |
101 |
|
|
int rc = SQLITE_OK; |
102 |
|
|
+ int nShmPerMap = unixShmRegionPerMap(); |
103 |
|
|
+ int nReqRegion; |
104 |
|
|
|
105 |
|
|
/* If the shared-memory file has not yet been opened, open it now. */ |
106 |
|
|
if( pDbFd->pShm==0 ){ |
107 |
|
|
rc = unixOpenSharedMemory(pDbFd); |
108 |
|
|
if( rc!=SQLITE_OK ) return rc; |
109 |
|
|
@@ -4339,13 +4376,16 @@ |
110 |
|
|
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); |
111 |
|
|
assert( pShmNode->pInode==pDbFd->pInode ); |
112 |
|
|
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); |
113 |
|
|
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); |
114 |
|
|
|
115 |
|
|
- if( pShmNode->nRegion<=iRegion ){ |
116 |
|
|
+ /* Minimum number of regions required to be mapped. */ |
117 |
|
|
+ nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; |
118 |
|
|
+ |
119 |
|
|
+ if( pShmNode->nRegion<nReqRegion ){ |
120 |
|
|
char **apNew; /* New apRegion[] array */ |
121 |
|
|
- int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ |
122 |
|
|
+ int nByte = nReqRegion*szRegion; /* Minimum required file size */ |
123 |
|
|
struct stat sStat; /* Used by fstat() */ |
124 |
|
|
|
125 |
|
|
pShmNode->szRegion = szRegion; |
126 |
|
|
|
127 |
|
|
if( pShmNode->h>=0 ){ |
128 |
|
|
@@ -4390,21 +4430,23 @@ |
129 |
|
|
} |
130 |
|
|
} |
131 |
|
|
|
132 |
|
|
/* Map the requested memory region into this processes address space. */ |
133 |
|
|
apNew = (char **)sqlite3_realloc( |
134 |
|
|
- pShmNode->apRegion, (iRegion+1)*sizeof(char *) |
135 |
|
|
+ pShmNode->apRegion, nReqRegion*sizeof(char *) |
136 |
|
|
); |
137 |
|
|
if( !apNew ){ |
138 |
|
|
rc = SQLITE_IOERR_NOMEM; |
139 |
|
|
goto shmpage_out; |
140 |
|
|
} |
141 |
|
|
pShmNode->apRegion = apNew; |
142 |
|
|
- while(pShmNode->nRegion<=iRegion){ |
143 |
|
|
+ while( pShmNode->nRegion<nReqRegion ){ |
144 |
|
|
+ int nMap = szRegion*nShmPerMap; |
145 |
|
|
+ int i; |
146 |
|
|
void *pMem; |
147 |
|
|
if( pShmNode->h>=0 ){ |
148 |
|
|
- pMem = osMmap(0, szRegion, |
149 |
|
|
+ pMem = osMmap(0, nMap, |
150 |
|
|
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, |
151 |
|
|
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion |
152 |
|
|
); |
153 |
|
|
if( pMem==MAP_FAILED ){ |
154 |
|
|
rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); |
155 |
|
|
@@ -4416,12 +4458,15 @@ |
156 |
|
|
rc = SQLITE_NOMEM; |
157 |
|
|
goto shmpage_out; |
158 |
|
|
} |
159 |
|
|
memset(pMem, 0, szRegion); |
160 |
|
|
} |
161 |
|
|
- pShmNode->apRegion[pShmNode->nRegion] = pMem; |
162 |
|
|
- pShmNode->nRegion++; |
163 |
|
|
+ |
164 |
|
|
+ for(i=0; i<nShmPerMap; i++){ |
165 |
|
|
+ pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; |
166 |
|
|
+ } |
167 |
|
|
+ pShmNode->nRegion += nShmPerMap; |
168 |
|
|
} |
169 |
|
|
} |
170 |
|
|
|
171 |
|
|
shmpage_out: |
172 |
|
|
if( pShmNode->nRegion>iRegion ){ |
173 |
|
|
@@ -4631,25 +4676,10 @@ |
174 |
|
|
#endif |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
#if SQLITE_MAX_MMAP_SIZE>0 |
178 |
|
|
/* |
179 |
|
|
-** Return the system page size. |
180 |
|
|
-*/ |
181 |
|
|
-static int unixGetPagesize(void){ |
182 |
|
|
-#if HAVE_MREMAP |
183 |
|
|
- return 512; |
184 |
|
|
-#elif defined(_BSD_SOURCE) |
185 |
|
|
- return getpagesize(); |
186 |
|
|
-#else |
187 |
|
|
- return (int)sysconf(_SC_PAGESIZE); |
188 |
|
|
-#endif |
189 |
|
|
-} |
190 |
|
|
-#endif /* SQLITE_MAX_MMAP_SIZE>0 */ |
191 |
|
|
- |
192 |
|
|
-#if SQLITE_MAX_MMAP_SIZE>0 |
193 |
|
|
-/* |
194 |
|
|
** Attempt to set the size of the memory mapping maintained by file |
195 |
|
|
** descriptor pFd to nNew bytes. Any existing mapping is discarded. |
196 |
|
|
** |
197 |
|
|
** If successful, this function sets the following variables: |
198 |
|
|
** |
199 |
|
|
@@ -4680,12 +4712,16 @@ |
200 |
|
|
assert( MAP_FAILED!=0 ); |
201 |
|
|
|
202 |
|
|
if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; |
203 |
|
|
|
204 |
|
|
if( pOrig ){ |
205 |
|
|
- const int szSyspage = unixGetPagesize(); |
206 |
|
|
+#if HAVE_MREMAP |
207 |
|
|
+ i64 nReuse = pFd->mmapSize; |
208 |
|
|
+#else |
209 |
|
|
+ const int szSyspage = osGetpagesize(); |
210 |
|
|
i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); |
211 |
|
|
+#endif |
212 |
|
|
u8 *pReq = &pOrig[nReuse]; |
213 |
|
|
|
214 |
|
|
/* Unmap any pages of the existing mapping that cannot be reused. */ |
215 |
|
|
if( nReuse!=nOrig ){ |
216 |
|
|
osMunmap(pReq, nOrig-nReuse); |
217 |
|
|
@@ -7427,11 +7463,11 @@ |
218 |
|
|
}; |
219 |
|
|
unsigned int i; /* Loop counter */ |
220 |
|
|
|
221 |
|
|
/* Double-check that the aSyscall[] array has been constructed |
222 |
|
|
** correctly. See ticket [bb3a86e890c8e96ab] */ |
223 |
|
|
- assert( ArraySize(aSyscall)==24 ); |
224 |
|
|
+ assert( ArraySize(aSyscall)==25 ); |
225 |
|
|
|
226 |
|
|
/* Register all VFSes defined in the aVfs[] array */ |
227 |
|
|
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |
228 |
|
|
sqlite3_vfs_register(&aVfs[i], i==0); |
229 |
|
|
} |
230 |
|
|
|
231 |
|
|
Index: src/test_syscall.c |
232 |
|
|
================================================================== |
233 |
|
|
--- src/test_syscall.c |
234 |
|
|
+++ src/test_syscall.c |
235 |
|
|
@@ -65,10 +65,15 @@ |
236 |
|
|
** Return true if the named system call exists. Or false otherwise. |
237 |
|
|
** |
238 |
|
|
** test_syscall list |
239 |
|
|
** Return a list of all system calls. The list is constructed using |
240 |
|
|
** the xNextSystemCall() VFS method. |
241 |
|
|
+** |
242 |
|
|
+** test_syscall pagesize PGSZ |
243 |
|
|
+** If PGSZ is a power of two greater than 256, install a wrapper around |
244 |
|
|
+** OS function getpagesize() that reports the system page size as PGSZ. |
245 |
|
|
+** Or, if PGSZ is less than zero, remove any wrapper already installed. |
246 |
|
|
*/ |
247 |
|
|
|
248 |
|
|
#include "sqliteInt.h" |
249 |
|
|
#include "sqlite3.h" |
250 |
|
|
#include "tcl.h" |
251 |
|
|
@@ -87,11 +92,13 @@ |
252 |
|
|
|
253 |
|
|
static struct TestSyscallGlobal { |
254 |
|
|
int bPersist; /* 1 for persistent errors, 0 for transient */ |
255 |
|
|
int nCount; /* Fail after this many more calls */ |
256 |
|
|
int nFail; /* Number of failures that have occurred */ |
257 |
|
|
-} gSyscall = { 0, 0 }; |
258 |
|
|
+ int pgsz; |
259 |
|
|
+ sqlite3_syscall_ptr orig_getpagesize; |
260 |
|
|
+} gSyscall = { 0, 0, 0, 0, 0 }; |
261 |
|
|
|
262 |
|
|
static int ts_open(const char *, int, int); |
263 |
|
|
static int ts_close(int fd); |
264 |
|
|
static int ts_access(const char *zPath, int mode); |
265 |
|
|
static char *ts_getcwd(char *zPath, size_t nPath); |
266 |
|
|
@@ -647,10 +654,49 @@ |
267 |
|
|
|
268 |
|
|
pVfs = sqlite3_vfs_find(0); |
269 |
|
|
Tcl_SetObjResult(interp, Tcl_NewStringObj(pVfs->zName, -1)); |
270 |
|
|
return TCL_OK; |
271 |
|
|
} |
272 |
|
|
+ |
273 |
|
|
+static int ts_getpagesize(void){ |
274 |
|
|
+ return gSyscall.pgsz; |
275 |
|
|
+} |
276 |
|
|
+ |
277 |
|
|
+static int test_syscall_pagesize( |
278 |
|
|
+ void * clientData, |
279 |
|
|
+ Tcl_Interp *interp, |
280 |
|
|
+ int objc, |
281 |
|
|
+ Tcl_Obj *CONST objv[] |
282 |
|
|
+){ |
283 |
|
|
+ sqlite3_vfs *pVfs = sqlite3_vfs_find(0); |
284 |
|
|
+ int pgsz; |
285 |
|
|
+ if( objc!=3 ){ |
286 |
|
|
+ Tcl_WrongNumArgs(interp, 2, objv, "PGSZ"); |
287 |
|
|
+ return TCL_ERROR; |
288 |
|
|
+ } |
289 |
|
|
+ if( Tcl_GetIntFromObj(interp, objv[2], &pgsz) ){ |
290 |
|
|
+ return TCL_ERROR; |
291 |
|
|
+ } |
292 |
|
|
+ |
293 |
|
|
+ if( pgsz<0 ){ |
294 |
|
|
+ if( gSyscall.orig_getpagesize ){ |
295 |
|
|
+ pVfs->xSetSystemCall(pVfs, "getpagesize", gSyscall.orig_getpagesize); |
296 |
|
|
+ } |
297 |
|
|
+ }else{ |
298 |
|
|
+ if( pgsz<512 || (pgsz & (pgsz-1)) ){ |
299 |
|
|
+ Tcl_AppendResult(interp, "pgsz out of range", 0); |
300 |
|
|
+ return TCL_ERROR; |
301 |
|
|
+ } |
302 |
|
|
+ gSyscall.orig_getpagesize = pVfs->xGetSystemCall(pVfs, "getpagesize"); |
303 |
|
|
+ gSyscall.pgsz = pgsz; |
304 |
|
|
+ pVfs->xSetSystemCall( |
305 |
|
|
+ pVfs, "getpagesize", (sqlite3_syscall_ptr)ts_getpagesize |
306 |
|
|
+ ); |
307 |
|
|
+ } |
308 |
|
|
+ |
309 |
|
|
+ return TCL_OK; |
310 |
|
|
+} |
311 |
|
|
|
312 |
|
|
static int test_syscall( |
313 |
|
|
void * clientData, |
314 |
|
|
Tcl_Interp *interp, |
315 |
|
|
int objc, |
316 |
|
|
@@ -666,10 +712,11 @@ |
317 |
|
|
{ "reset", test_syscall_reset }, |
318 |
|
|
{ "errno", test_syscall_errno }, |
319 |
|
|
{ "exists", test_syscall_exists }, |
320 |
|
|
{ "list", test_syscall_list }, |
321 |
|
|
{ "defaultvfs", test_syscall_defaultvfs }, |
322 |
|
|
+ { "pagesize", test_syscall_pagesize }, |
323 |
|
|
{ 0, 0 } |
324 |
|
|
}; |
325 |
|
|
int iCmd; |
326 |
|
|
int rc; |
327 |
|
|
|
328 |
|
|
|
329 |
|
|
Index: test/syscall.test |
330 |
|
|
================================================================== |
331 |
|
|
--- test/syscall.test |
332 |
|
|
+++ test/syscall.test |
333 |
|
|
@@ -59,10 +59,11 @@ |
334 |
|
|
foreach s { |
335 |
|
|
open close access getcwd stat fstat ftruncate |
336 |
|
|
fcntl read pread write pwrite fchmod fallocate |
337 |
|
|
pread64 pwrite64 unlink openDirectory mkdir rmdir |
338 |
|
|
statvfs fchown umask mmap munmap mremap |
339 |
|
|
+ getpagesize |
340 |
|
|
} { |
341 |
|
|
if {[test_syscall exists $s]} {lappend syscall_list $s} |
342 |
|
|
} |
343 |
|
|
do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] |
344 |
|
|
|
345 |
|
|
|
346 |
|
|
ADDED test/wal64k.test |
347 |
|
|
Index: test/wal64k.test |
348 |
|
|
================================================================== |
349 |
|
|
--- test/wal64k.test |
350 |
|
|
+++ test/wal64k.test |
351 |
|
|
@@ -0,0 +1,47 @@ |
352 |
|
|
+# 2010 April 13 |
353 |
|
|
+# |
354 |
|
|
+# The author disclaims copyright to this source code. In place of |
355 |
|
|
+# a legal notice, here is a blessing: |
356 |
|
|
+# |
357 |
|
|
+# May you do good and not evil. |
358 |
|
|
+# May you find forgiveness for yourself and forgive others. |
359 |
|
|
+# May you share freely, never taking more than you give. |
360 |
|
|
+# |
361 |
|
|
+#*********************************************************************** |
362 |
|
|
+# This file implements regression tests for SQLite library. The |
363 |
|
|
+# focus of this file is testing the operation of the library in |
364 |
|
|
+# "PRAGMA journal_mode=WAL" mode. |
365 |
|
|
+# |
366 |
|
|
+ |
367 |
|
|
+set testdir [file dirname $argv0] |
368 |
|
|
+source $testdir/tester.tcl |
369 |
|
|
+set testprefix wal64k |
370 |
|
|
+ |
371 |
|
|
+ifcapable !wal {finish_test ; return } |
372 |
|
|
+ |
373 |
|
|
+db close |
374 |
|
|
+test_syscall pagesize 65536 |
375 |
|
|
+sqlite3 db test.db |
376 |
|
|
+ |
377 |
|
|
+do_execsql_test 1.0 { |
378 |
|
|
+ PRAGMA journal_mode = WAL; |
379 |
|
|
+ CREATE TABLE t1(x); |
380 |
|
|
+ CREATE INDEX i1 ON t1(x); |
381 |
|
|
+} {wal} |
382 |
|
|
+do_test 1.1 { file size test.db-shm } {65536} |
383 |
|
|
+ |
384 |
|
|
+do_test 1.2 { |
385 |
|
|
+ execsql BEGIN |
386 |
|
|
+ while {[file size test.db-shm]==65536} { |
387 |
|
|
+ execsql { INSERT INTO t1 VALUES( randstr(900,1100) ) } |
388 |
|
|
+ } |
389 |
|
|
+ execsql COMMIT |
390 |
|
|
+ file size test.db-shm |
391 |
|
|
+} {131072} |
392 |
|
|
+ |
393 |
|
|
+integrity_check 1.3 |
394 |
|
|
+ |
395 |
|
|
+db close |
396 |
|
|
+test_syscall pagesize -1 |
397 |
|
|
+finish_test |
398 |
|
|
+ |
399 |
|
|
|