1 |
Backport of upstream commit db7d62c8d5: |
2 |
|
3 |
Avoid attempting to mmap memory from an offset that is not a multiple of |
4 |
the system page size on systems with page sizes larger than 32KB. |
5 |
|
6 |
https://www.sqlite.org/src/info/db7d62c8d58eb1e8654a762c9b199ae4e2759038 |
7 |
|
8 |
Index: src/os_unix.c |
9 |
================================================================== |
10 |
--- src/os_unix.c |
11 |
+++ src/os_unix.c |
12 |
@@ -321,10 +321,11 @@ |
13 |
return geteuid() ? 0 : fchown(fd,uid,gid); |
14 |
} |
15 |
|
16 |
/* Forward reference */ |
17 |
static int openDirectory(const char*, int*); |
18 |
+static int unixGetpagesize(void); |
19 |
|
20 |
/* |
21 |
** Many system calls are accessed through pointer-to-functions so that |
22 |
** they may be overridden at runtime to facilitate fault injection during |
23 |
** testing and sandboxing. The following array holds the names and pointers |
24 |
@@ -443,10 +444,13 @@ |
25 |
{ "mremap", (sqlite3_syscall_ptr)mremap, 0 }, |
26 |
#else |
27 |
{ "mremap", (sqlite3_syscall_ptr)0, 0 }, |
28 |
#endif |
29 |
#define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) |
30 |
+ |
31 |
+ { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, |
32 |
+#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) |
33 |
|
34 |
}; /* End of the overrideable system calls */ |
35 |
|
36 |
/* |
37 |
** This is the xSetSystemCall() method of sqlite3_vfs for all of the |
38 |
@@ -4103,10 +4107,40 @@ |
39 |
#endif |
40 |
|
41 |
return rc; |
42 |
} |
43 |
|
44 |
+/* |
45 |
+** Return the system page size. |
46 |
+** |
47 |
+** This function should not be called directly by other code in this file. |
48 |
+** Instead, it should be called via macro osGetpagesize(). |
49 |
+*/ |
50 |
+static int unixGetpagesize(void){ |
51 |
+#if defined(_BSD_SOURCE) |
52 |
+ return getpagesize(); |
53 |
+#else |
54 |
+ return (int)sysconf(_SC_PAGESIZE); |
55 |
+#endif |
56 |
+} |
57 |
+ |
58 |
+/* |
59 |
+** Return the minimum number of 32KB shm regions that should be mapped at |
60 |
+** a time, assuming that each mapping must be an integer multiple of the |
61 |
+** current system page-size. |
62 |
+** |
63 |
+** Usually, this is 1. The exception seems to be systems that are configured |
64 |
+** to use 64KB pages - in this case each mapping must cover at least two |
65 |
+** shm regions. |
66 |
+*/ |
67 |
+static int unixShmRegionPerMap(void){ |
68 |
+ int shmsz = 32*1024; /* SHM region size */ |
69 |
+ int pgsz = osGetpagesize(); /* System page size */ |
70 |
+ assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ |
71 |
+ if( pgsz<shmsz ) return 1; |
72 |
+ return pgsz/shmsz; |
73 |
+} |
74 |
|
75 |
/* |
76 |
** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. |
77 |
** |
78 |
** This is not a VFS shared-memory method; it is a utility function called |
79 |
@@ -4114,14 +4148,15 @@ |
80 |
*/ |
81 |
static void unixShmPurge(unixFile *pFd){ |
82 |
unixShmNode *p = pFd->pInode->pShmNode; |
83 |
assert( unixMutexHeld() ); |
84 |
if( p && p->nRef==0 ){ |
85 |
+ int nShmPerMap = unixShmRegionPerMap(); |
86 |
int i; |
87 |
assert( p->pInode==pFd->pInode ); |
88 |
sqlite3_mutex_free(p->mutex); |
89 |
- for(i=0; i<p->nRegion; i++){ |
90 |
+ for(i=0; i<p->nRegion; i+=nShmPerMap){ |
91 |
if( p->h>=0 ){ |
92 |
osMunmap(p->apRegion[i], p->szRegion); |
93 |
}else{ |
94 |
sqlite3_free(p->apRegion[i]); |
95 |
} |
96 |
@@ -4324,10 +4359,12 @@ |
97 |
){ |
98 |
unixFile *pDbFd = (unixFile*)fd; |
99 |
unixShm *p; |
100 |
unixShmNode *pShmNode; |
101 |
int rc = SQLITE_OK; |
102 |
+ int nShmPerMap = unixShmRegionPerMap(); |
103 |
+ int nReqRegion; |
104 |
|
105 |
/* If the shared-memory file has not yet been opened, open it now. */ |
106 |
if( pDbFd->pShm==0 ){ |
107 |
rc = unixOpenSharedMemory(pDbFd); |
108 |
if( rc!=SQLITE_OK ) return rc; |
109 |
@@ -4339,13 +4376,16 @@ |
110 |
assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); |
111 |
assert( pShmNode->pInode==pDbFd->pInode ); |
112 |
assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); |
113 |
assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); |
114 |
|
115 |
- if( pShmNode->nRegion<=iRegion ){ |
116 |
+ /* Minimum number of regions required to be mapped. */ |
117 |
+ nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; |
118 |
+ |
119 |
+ if( pShmNode->nRegion<nReqRegion ){ |
120 |
char **apNew; /* New apRegion[] array */ |
121 |
- int nByte = (iRegion+1)*szRegion; /* Minimum required file size */ |
122 |
+ int nByte = nReqRegion*szRegion; /* Minimum required file size */ |
123 |
struct stat sStat; /* Used by fstat() */ |
124 |
|
125 |
pShmNode->szRegion = szRegion; |
126 |
|
127 |
if( pShmNode->h>=0 ){ |
128 |
@@ -4390,21 +4430,23 @@ |
129 |
} |
130 |
} |
131 |
|
132 |
/* Map the requested memory region into this processes address space. */ |
133 |
apNew = (char **)sqlite3_realloc( |
134 |
- pShmNode->apRegion, (iRegion+1)*sizeof(char *) |
135 |
+ pShmNode->apRegion, nReqRegion*sizeof(char *) |
136 |
); |
137 |
if( !apNew ){ |
138 |
rc = SQLITE_IOERR_NOMEM; |
139 |
goto shmpage_out; |
140 |
} |
141 |
pShmNode->apRegion = apNew; |
142 |
- while(pShmNode->nRegion<=iRegion){ |
143 |
+ while( pShmNode->nRegion<nReqRegion ){ |
144 |
+ int nMap = szRegion*nShmPerMap; |
145 |
+ int i; |
146 |
void *pMem; |
147 |
if( pShmNode->h>=0 ){ |
148 |
- pMem = osMmap(0, szRegion, |
149 |
+ pMem = osMmap(0, nMap, |
150 |
pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, |
151 |
MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion |
152 |
); |
153 |
if( pMem==MAP_FAILED ){ |
154 |
rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); |
155 |
@@ -4416,12 +4458,15 @@ |
156 |
rc = SQLITE_NOMEM; |
157 |
goto shmpage_out; |
158 |
} |
159 |
memset(pMem, 0, szRegion); |
160 |
} |
161 |
- pShmNode->apRegion[pShmNode->nRegion] = pMem; |
162 |
- pShmNode->nRegion++; |
163 |
+ |
164 |
+ for(i=0; i<nShmPerMap; i++){ |
165 |
+ pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; |
166 |
+ } |
167 |
+ pShmNode->nRegion += nShmPerMap; |
168 |
} |
169 |
} |
170 |
|
171 |
shmpage_out: |
172 |
if( pShmNode->nRegion>iRegion ){ |
173 |
@@ -4631,25 +4676,10 @@ |
174 |
#endif |
175 |
} |
176 |
|
177 |
#if SQLITE_MAX_MMAP_SIZE>0 |
178 |
/* |
179 |
-** Return the system page size. |
180 |
-*/ |
181 |
-static int unixGetPagesize(void){ |
182 |
-#if HAVE_MREMAP |
183 |
- return 512; |
184 |
-#elif defined(_BSD_SOURCE) |
185 |
- return getpagesize(); |
186 |
-#else |
187 |
- return (int)sysconf(_SC_PAGESIZE); |
188 |
-#endif |
189 |
-} |
190 |
-#endif /* SQLITE_MAX_MMAP_SIZE>0 */ |
191 |
- |
192 |
-#if SQLITE_MAX_MMAP_SIZE>0 |
193 |
-/* |
194 |
** Attempt to set the size of the memory mapping maintained by file |
195 |
** descriptor pFd to nNew bytes. Any existing mapping is discarded. |
196 |
** |
197 |
** If successful, this function sets the following variables: |
198 |
** |
199 |
@@ -4680,12 +4712,16 @@ |
200 |
assert( MAP_FAILED!=0 ); |
201 |
|
202 |
if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; |
203 |
|
204 |
if( pOrig ){ |
205 |
- const int szSyspage = unixGetPagesize(); |
206 |
+#if HAVE_MREMAP |
207 |
+ i64 nReuse = pFd->mmapSize; |
208 |
+#else |
209 |
+ const int szSyspage = osGetpagesize(); |
210 |
i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); |
211 |
+#endif |
212 |
u8 *pReq = &pOrig[nReuse]; |
213 |
|
214 |
/* Unmap any pages of the existing mapping that cannot be reused. */ |
215 |
if( nReuse!=nOrig ){ |
216 |
osMunmap(pReq, nOrig-nReuse); |
217 |
@@ -7427,11 +7463,11 @@ |
218 |
}; |
219 |
unsigned int i; /* Loop counter */ |
220 |
|
221 |
/* Double-check that the aSyscall[] array has been constructed |
222 |
** correctly. See ticket [bb3a86e890c8e96ab] */ |
223 |
- assert( ArraySize(aSyscall)==24 ); |
224 |
+ assert( ArraySize(aSyscall)==25 ); |
225 |
|
226 |
/* Register all VFSes defined in the aVfs[] array */ |
227 |
for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ |
228 |
sqlite3_vfs_register(&aVfs[i], i==0); |
229 |
} |
230 |
|
231 |
Index: src/test_syscall.c |
232 |
================================================================== |
233 |
--- src/test_syscall.c |
234 |
+++ src/test_syscall.c |
235 |
@@ -65,10 +65,15 @@ |
236 |
** Return true if the named system call exists. Or false otherwise. |
237 |
** |
238 |
** test_syscall list |
239 |
** Return a list of all system calls. The list is constructed using |
240 |
** the xNextSystemCall() VFS method. |
241 |
+** |
242 |
+** test_syscall pagesize PGSZ |
243 |
+** If PGSZ is a power of two greater than 256, install a wrapper around |
244 |
+** OS function getpagesize() that reports the system page size as PGSZ. |
245 |
+** Or, if PGSZ is less than zero, remove any wrapper already installed. |
246 |
*/ |
247 |
|
248 |
#include "sqliteInt.h" |
249 |
#include "sqlite3.h" |
250 |
#include "tcl.h" |
251 |
@@ -87,11 +92,13 @@ |
252 |
|
253 |
static struct TestSyscallGlobal { |
254 |
int bPersist; /* 1 for persistent errors, 0 for transient */ |
255 |
int nCount; /* Fail after this many more calls */ |
256 |
int nFail; /* Number of failures that have occurred */ |
257 |
-} gSyscall = { 0, 0 }; |
258 |
+ int pgsz; |
259 |
+ sqlite3_syscall_ptr orig_getpagesize; |
260 |
+} gSyscall = { 0, 0, 0, 0, 0 }; |
261 |
|
262 |
static int ts_open(const char *, int, int); |
263 |
static int ts_close(int fd); |
264 |
static int ts_access(const char *zPath, int mode); |
265 |
static char *ts_getcwd(char *zPath, size_t nPath); |
266 |
@@ -647,10 +654,49 @@ |
267 |
|
268 |
pVfs = sqlite3_vfs_find(0); |
269 |
Tcl_SetObjResult(interp, Tcl_NewStringObj(pVfs->zName, -1)); |
270 |
return TCL_OK; |
271 |
} |
272 |
+ |
273 |
+static int ts_getpagesize(void){ |
274 |
+ return gSyscall.pgsz; |
275 |
+} |
276 |
+ |
277 |
+static int test_syscall_pagesize( |
278 |
+ void * clientData, |
279 |
+ Tcl_Interp *interp, |
280 |
+ int objc, |
281 |
+ Tcl_Obj *CONST objv[] |
282 |
+){ |
283 |
+ sqlite3_vfs *pVfs = sqlite3_vfs_find(0); |
284 |
+ int pgsz; |
285 |
+ if( objc!=3 ){ |
286 |
+ Tcl_WrongNumArgs(interp, 2, objv, "PGSZ"); |
287 |
+ return TCL_ERROR; |
288 |
+ } |
289 |
+ if( Tcl_GetIntFromObj(interp, objv[2], &pgsz) ){ |
290 |
+ return TCL_ERROR; |
291 |
+ } |
292 |
+ |
293 |
+ if( pgsz<0 ){ |
294 |
+ if( gSyscall.orig_getpagesize ){ |
295 |
+ pVfs->xSetSystemCall(pVfs, "getpagesize", gSyscall.orig_getpagesize); |
296 |
+ } |
297 |
+ }else{ |
298 |
+ if( pgsz<512 || (pgsz & (pgsz-1)) ){ |
299 |
+ Tcl_AppendResult(interp, "pgsz out of range", 0); |
300 |
+ return TCL_ERROR; |
301 |
+ } |
302 |
+ gSyscall.orig_getpagesize = pVfs->xGetSystemCall(pVfs, "getpagesize"); |
303 |
+ gSyscall.pgsz = pgsz; |
304 |
+ pVfs->xSetSystemCall( |
305 |
+ pVfs, "getpagesize", (sqlite3_syscall_ptr)ts_getpagesize |
306 |
+ ); |
307 |
+ } |
308 |
+ |
309 |
+ return TCL_OK; |
310 |
+} |
311 |
|
312 |
static int test_syscall( |
313 |
void * clientData, |
314 |
Tcl_Interp *interp, |
315 |
int objc, |
316 |
@@ -666,10 +712,11 @@ |
317 |
{ "reset", test_syscall_reset }, |
318 |
{ "errno", test_syscall_errno }, |
319 |
{ "exists", test_syscall_exists }, |
320 |
{ "list", test_syscall_list }, |
321 |
{ "defaultvfs", test_syscall_defaultvfs }, |
322 |
+ { "pagesize", test_syscall_pagesize }, |
323 |
{ 0, 0 } |
324 |
}; |
325 |
int iCmd; |
326 |
int rc; |
327 |
|
328 |
|
329 |
Index: test/syscall.test |
330 |
================================================================== |
331 |
--- test/syscall.test |
332 |
+++ test/syscall.test |
333 |
@@ -59,10 +59,11 @@ |
334 |
foreach s { |
335 |
open close access getcwd stat fstat ftruncate |
336 |
fcntl read pread write pwrite fchmod fallocate |
337 |
pread64 pwrite64 unlink openDirectory mkdir rmdir |
338 |
statvfs fchown umask mmap munmap mremap |
339 |
+ getpagesize |
340 |
} { |
341 |
if {[test_syscall exists $s]} {lappend syscall_list $s} |
342 |
} |
343 |
do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] |
344 |
|
345 |
|
346 |
ADDED test/wal64k.test |
347 |
Index: test/wal64k.test |
348 |
================================================================== |
349 |
--- test/wal64k.test |
350 |
+++ test/wal64k.test |
351 |
@@ -0,0 +1,47 @@ |
352 |
+# 2010 April 13 |
353 |
+# |
354 |
+# The author disclaims copyright to this source code. In place of |
355 |
+# a legal notice, here is a blessing: |
356 |
+# |
357 |
+# May you do good and not evil. |
358 |
+# May you find forgiveness for yourself and forgive others. |
359 |
+# May you share freely, never taking more than you give. |
360 |
+# |
361 |
+#*********************************************************************** |
362 |
+# This file implements regression tests for SQLite library. The |
363 |
+# focus of this file is testing the operation of the library in |
364 |
+# "PRAGMA journal_mode=WAL" mode. |
365 |
+# |
366 |
+ |
367 |
+set testdir [file dirname $argv0] |
368 |
+source $testdir/tester.tcl |
369 |
+set testprefix wal64k |
370 |
+ |
371 |
+ifcapable !wal {finish_test ; return } |
372 |
+ |
373 |
+db close |
374 |
+test_syscall pagesize 65536 |
375 |
+sqlite3 db test.db |
376 |
+ |
377 |
+do_execsql_test 1.0 { |
378 |
+ PRAGMA journal_mode = WAL; |
379 |
+ CREATE TABLE t1(x); |
380 |
+ CREATE INDEX i1 ON t1(x); |
381 |
+} {wal} |
382 |
+do_test 1.1 { file size test.db-shm } {65536} |
383 |
+ |
384 |
+do_test 1.2 { |
385 |
+ execsql BEGIN |
386 |
+ while {[file size test.db-shm]==65536} { |
387 |
+ execsql { INSERT INTO t1 VALUES( randstr(900,1100) ) } |
388 |
+ } |
389 |
+ execsql COMMIT |
390 |
+ file size test.db-shm |
391 |
+} {131072} |
392 |
+ |
393 |
+integrity_check 1.3 |
394 |
+ |
395 |
+db close |
396 |
+test_syscall pagesize -1 |
397 |
+finish_test |
398 |
+ |
399 |
|