152a38012Sejakowatz/*
24535495dSIngo Weinhold * Copyright 2010-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3c5e2c3ecSAxel Dörfler * Copyright 2002-2010, Axel D��rfler, axeld@pinc-software.de.
486b5aa8dSAxel Dörfler * Distributed under the terms of the MIT License.
586b5aa8dSAxel Dörfler *
686b5aa8dSAxel Dörfler * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
786b5aa8dSAxel Dörfler * Distributed under the terms of the NewOS License.
886b5aa8dSAxel Dörfler */
986b5aa8dSAxel Dörfler
105800e8a4SIngo Weinhold
116d4aea47SAxel Dörfler#include <string.h>
126d4aea47SAxel Dörfler#include <stdlib.h>
134544e733SAxel Dörfler
14cff6e9e4SIngo Weinhold#include <algorithm>
15cff6e9e4SIngo Weinhold
168af4cf5fSAxel Dörfler#include <KernelExport.h>
178af4cf5fSAxel Dörfler#include <OS.h>
188af4cf5fSAxel Dörfler
19ae21ddafSIngo Weinhold#include <AutoDeleter.h>
20ae21ddafSIngo Weinhold
2152a38012Sejakowatz#include <arch/cpu.h>
22279c6b76SIngo Weinhold#include <arch/vm_translation_map.h>
238d12bd13SIngo Weinhold#include <block_cache.h>
24279c6b76SIngo Weinhold#include <boot/kernel_args.h>
25279c6b76SIngo Weinhold#include <condition_variable.h>
2690c6930eSMichael Lotz#include <elf.h>
271af7d115SMichael Lotz#include <heap.h>
28279c6b76SIngo Weinhold#include <kernel.h>
295c99d639SIngo Weinhold#include <low_resource_manager.h>
3053d43e3fSAxel Dörfler#include <thread.h>
3182d444a2SIngo Weinhold#include <tracing.h>
326d4aea47SAxel Dörfler#include <util/AutoLock.h>
332a79a768SIngo Weinhold#include <vfs.h>
34e50cf876SIngo Weinhold#include <vm/vm.h>
35e50cf876SIngo Weinhold#include <vm/vm_priv.h>
36e50cf876SIngo Weinhold#include <vm/vm_page.h>
37e50cf876SIngo Weinhold#include <vm/VMAddressSpace.h>
38f34a1dd5SIngo Weinhold#include <vm/VMArea.h>
39e50cf876SIngo Weinhold#include <vm/VMCache.h>
4052a38012Sejakowatz
41aa4ba93eSIngo Weinhold#include "IORequest.h"
42a6778735SAxel Dörfler#include "PageCacheLocker.h"
435800e8a4SIngo Weinhold#include "VMAnonymousCache.h"
445800e8a4SIngo Weinhold#include "VMPageQueue.h"
45a6778735SAxel Dörfler
4652a38012Sejakowatz
47be84cd39SAxel Dörfler//#define TRACE_VM_PAGE
48be84cd39SAxel Dörfler#ifdef TRACE_VM_PAGE
49be84cd39SAxel Dörfler#	define TRACE(x) dprintf x
50be84cd39SAxel Dörfler#else
51be84cd39SAxel Dörfler#	define TRACE(x) ;
52be84cd39SAxel Dörfler#endif
53be84cd39SAxel Dörfler
5440bb9481SIngo Weinhold//#define TRACE_VM_DAEMONS
5540bb9481SIngo Weinhold#ifdef TRACE_VM_DAEMONS
5640bb9481SIngo Weinhold#define TRACE_DAEMON(x...) dprintf(x)
5740bb9481SIngo Weinhold#else
5840bb9481SIngo Weinhold#define TRACE_DAEMON(x...) do {} while (false)
5940bb9481SIngo Weinhold#endif
6040bb9481SIngo Weinhold
6140bb9481SIngo Weinhold//#define TRACK_PAGE_USAGE_STATS	1
6240bb9481SIngo Weinhold
634a1f6683SIngo Weinhold#define PAGE_ASSERT(page, condition)	\
644a1f6683SIngo Weinhold	ASSERT_PRINT((condition), "page: %p", (page))
654a1f6683SIngo Weinhold
6613dee313SAxel Dörfler#define SCRUB_SIZE 16
6713dee313SAxel Dörfler	// this many pages will be cleared at once in the page scrubber thread
68be84cd39SAxel Dörfler
6951f837aeSIngo Weinhold#define MAX_PAGE_WRITER_IO_PRIORITY				B_URGENT_DISPLAY_PRIORITY
7051f837aeSIngo Weinhold	// maximum I/O priority of the page writer
7151f837aeSIngo Weinhold#define MAX_PAGE_WRITER_IO_PRIORITY_THRESHOLD	10000
7251f837aeSIngo Weinhold	// the maximum I/O priority shall be reached when this many pages need to
7351f837aeSIngo Weinhold	// be written
7451f837aeSIngo Weinhold
75aacb158cSIngo Weinhold
76cff6e9e4SIngo Weinhold// The page reserve an allocation of the certain priority must not touch.
77cff6e9e4SIngo Weinholdstatic const size_t kPageReserveForPriority[] = {
78cff6e9e4SIngo Weinhold	VM_PAGE_RESERVE_USER,		// user
79cff6e9e4SIngo Weinhold	VM_PAGE_RESERVE_SYSTEM,		// system
80cff6e9e4SIngo Weinhold	0							// VIP
81cff6e9e4SIngo Weinhold};
82cff6e9e4SIngo Weinhold
8340bb9481SIngo Weinhold// Minimum number of free pages the page daemon will try to achieve.
8440bb9481SIngo Weinholdstatic uint32 sFreePagesTarget;
8540bb9481SIngo Weinholdstatic uint32 sFreeOrCachedPagesTarget;
8640bb9481SIngo Weinholdstatic uint32 sInactivePagesTarget;
87cff6e9e4SIngo Weinhold
8840bb9481SIngo Weinhold// Wait interval between page daemon runs.
8940bb9481SIngo Weinholdstatic const bigtime_t kIdleScanWaitInterval = 1000000LL;	// 1 sec
9040bb9481SIngo Weinholdstatic const bigtime_t kBusyScanWaitInterval = 500000LL;	// 0.5 sec
9140bb9481SIngo Weinhold
9240bb9481SIngo Weinhold// Number of idle runs after which we want to have processed the full active
9340bb9481SIngo Weinhold// queue.
9440bb9481SIngo Weinholdstatic const uint32 kIdleRunsForFullQueue = 20;
9540bb9481SIngo Weinhold
9640bb9481SIngo Weinhold// Maximum limit for the vm_page::usage_count.
9740bb9481SIngo Weinholdstatic const int32 kPageUsageMax = 64;
9840bb9481SIngo Weinhold// vm_page::usage_count buff an accessed page receives in a scan.
9940bb9481SIngo Weinholdstatic const int32 kPageUsageAdvance = 3;
10040bb9481SIngo Weinhold// vm_page::usage_count debuff an unaccessed page receives in a scan.
10140bb9481SIngo Weinholdstatic const int32 kPageUsageDecline = 1;
102cff6e9e4SIngo Weinhold
1038d12bd13SIngo Weinholdint32 gMappedPagesCount;
1048d12bd13SIngo Weinhold
105e65c4002SIngo Weinholdstatic VMPageQueue sPageQueues[PAGE_STATE_COUNT];
106e65c4002SIngo Weinhold
107e65c4002SIngo Weinholdstatic VMPageQueue& sFreePageQueue = sPageQueues[PAGE_STATE_FREE];
108e65c4002SIngo Weinholdstatic VMPageQueue& sClearPageQueue = sPageQueues[PAGE_STATE_CLEAR];
109e65c4002SIngo Weinholdstatic VMPageQueue& sModifiedPageQueue = sPageQueues[PAGE_STATE_MODIFIED];
110e65c4002SIngo Weinholdstatic VMPageQueue& sInactivePageQueue = sPageQueues[PAGE_STATE_INACTIVE];
111e65c4002SIngo Weinholdstatic VMPageQueue& sActivePageQueue = sPageQueues[PAGE_STATE_ACTIVE];
112e65c4002SIngo Weinholdstatic VMPageQueue& sCachedPageQueue = sPageQueues[PAGE_STATE_CACHED];
11352a38012Sejakowatz
114db823da5SAxel Dörflerstatic vm_page *sPages;
115147133b7SIngo Weinholdstatic page_num_t sPhysicalPageOffset;
116147133b7SIngo Weinholdstatic page_num_t sNumPages;
11703732070SIngo Weinholdstatic page_num_t sNonExistingPages;
11803732070SIngo Weinhold	// pages in the sPages array that aren't backed by physical memory
11903732070SIngo Weinholdstatic uint64 sIgnoredPages;
12003732070SIngo Weinhold	// pages of physical memory ignored by the boot loader (and thus not
12103732070SIngo Weinhold	// available here)
12273ad2473SPawel Dziepakstatic int32 sUnreservedFreePages;
12373ad2473SPawel Dziepakstatic int32 sUnsatisfiedPageReservations;
12473ad2473SPawel Dziepakstatic int32 sModifiedTemporaryPages;
12552a38012Sejakowatz
1266cef245eSIngo Weinholdstatic ConditionVariable sFreePageCondition;
1273cd20943SIngo Weinholdstatic mutex sPageDeficitLock = MUTEX_INITIALIZER("page deficit");
12852a38012Sejakowatz
129c5e2c3ecSAxel Dörfler// This lock must be used whenever the free or clear page queues are changed.
130c5e2c3ecSAxel Dörfler// If you need to work on both queues at the same time, you need to hold a write
131c5e2c3ecSAxel Dörfler// lock, otherwise, a read lock suffices (each queue still has a spinlock to
132c5e2c3ecSAxel Dörfler// guard against concurrent changes).
1337372a88aSIngo Weinholdstatic rw_lock sFreePageQueuesLock
1347372a88aSIngo Weinhold	= RW_LOCK_INITIALIZER("free/clear page queues");
1357372a88aSIngo Weinhold
13640bb9481SIngo Weinhold#ifdef TRACK_PAGE_USAGE_STATS
13740bb9481SIngo Weinholdstatic page_num_t sPageUsageArrays[512];
13840bb9481SIngo Weinholdstatic page_num_t* sPageUsage = sPageUsageArrays;
13940bb9481SIngo Weinholdstatic page_num_t sPageUsagePageCount;
14040bb9481SIngo Weinholdstatic page_num_t* sNextPageUsage = sPageUsageArrays + 256;
14140bb9481SIngo Weinholdstatic page_num_t sNextPageUsagePageCount;
14240bb9481SIngo Weinhold#endif
14340bb9481SIngo Weinhold
14440bb9481SIngo Weinhold
14590c6930eSMichael Lotz#if VM_PAGE_ALLOCATION_TRACKING_AVAILABLE
14690c6930eSMichael Lotz
14790c6930eSMichael Lotzstruct caller_info {
14890c6930eSMichael Lotz	addr_t		caller;
14990c6930eSMichael Lotz	size_t		count;
15090c6930eSMichael Lotz};
15190c6930eSMichael Lotz
15290c6930eSMichael Lotzstatic const int32 kCallerInfoTableSize = 1024;
15390c6930eSMichael Lotzstatic caller_info sCallerInfoTable[kCallerInfoTableSize];
15490c6930eSMichael Lotzstatic int32 sCallerInfoCount = 0;
15590c6930eSMichael Lotz
15690c6930eSMichael Lotzstatic caller_info* get_caller_info(addr_t caller);
15790c6930eSMichael Lotz
15890c6930eSMichael Lotz
15990c6930eSMichael LotzRANGE_MARKER_FUNCTION_PROTOTYPES(vm_page)
16090c6930eSMichael Lotz
16190c6930eSMichael Lotzstatic const addr_t kVMPageCodeAddressRange[] = {
16290c6930eSMichael Lotz	RANGE_MARKER_FUNCTION_ADDRESS_RANGE(vm_page)
16390c6930eSMichael Lotz};
16490c6930eSMichael Lotz
16590c6930eSMichael Lotz#endif
16690c6930eSMichael Lotz
16790c6930eSMichael Lotz
16890c6930eSMichael LotzRANGE_MARKER_FUNCTION_BEGIN(vm_page)
16990c6930eSMichael Lotz
17090c6930eSMichael Lotz
171fa47a7c4SIngo Weinholdstruct page_stats {
172fa47a7c4SIngo Weinhold	int32	totalFreePages;
173fa47a7c4SIngo Weinhold	int32	unsatisfiedReservations;
174fa47a7c4SIngo Weinhold	int32	cachedPages;
175fa47a7c4SIngo Weinhold};
176fa47a7c4SIngo Weinhold
177fa47a7c4SIngo Weinhold
17840bb9481SIngo Weinholdstruct PageReservationWaiter
17940bb9481SIngo Weinhold		: public DoublyLinkedListLinkImpl<PageReservationWaiter> {
1804535495dSIngo Weinhold	Thread*	thread;
1814535495dSIngo Weinhold	uint32	dontTouch;		// reserve not to touch
1824535495dSIngo Weinhold	uint32	missing;		// pages missing for the reservation
1834535495dSIngo Weinhold	int32	threadPriority;
18440bb9481SIngo Weinhold
18540bb9481SIngo Weinhold	bool operator<(const PageReservationWaiter& other) const
18640bb9481SIngo Weinhold	{
18740bb9481SIngo Weinhold		// Implies an order by descending VM priority (ascending dontTouch)
18840bb9481SIngo Weinhold		// and (secondarily) descending thread priority.
18940bb9481SIngo Weinhold		if (dontTouch != other.dontTouch)
19040bb9481SIngo Weinhold			return dontTouch < other.dontTouch;
19140bb9481SIngo Weinhold		return threadPriority > other.threadPriority;
19240bb9481SIngo Weinhold	}
19340bb9481SIngo Weinhold};
19440bb9481SIngo Weinhold
19540bb9481SIngo Weinholdtypedef DoublyLinkedList<PageReservationWaiter> PageReservationWaiterList;
19640bb9481SIngo Weinholdstatic PageReservationWaiterList sPageReservationWaiters;
19740bb9481SIngo Weinhold
19840bb9481SIngo Weinhold
19940bb9481SIngo Weinholdstruct DaemonCondition {
20040bb9481SIngo Weinhold	void Init(const char* name)
20140bb9481SIngo Weinhold	{
20240bb9481SIngo Weinhold		mutex_init(&fLock, "daemon condition");
20340bb9481SIngo Weinhold		fCondition.Init(this, name);
20440bb9481SIngo Weinhold		fActivated = false;
20540bb9481SIngo Weinhold	}
20640bb9481SIngo Weinhold
20740bb9481SIngo Weinhold	bool Lock()
20840bb9481SIngo Weinhold	{
20940bb9481SIngo Weinhold		return mutex_lock(&fLock) == B_OK;
21040bb9481SIngo Weinhold	}
21140bb9481SIngo Weinhold
21240bb9481SIngo Weinhold	void Unlock()
21340bb9481SIngo Weinhold	{
21440bb9481SIngo Weinhold		mutex_unlock(&fLock);
21540bb9481SIngo Weinhold	}
21640bb9481SIngo Weinhold
21740bb9481SIngo Weinhold	bool Wait(bigtime_t timeout, bool clearActivated)
21840bb9481SIngo Weinhold	{
21940bb9481SIngo Weinhold		MutexLocker locker(fLock);
22040bb9481SIngo Weinhold		if (clearActivated)
22140bb9481SIngo Weinhold			fActivated = false;
22240bb9481SIngo Weinhold		else if (fActivated)
22340bb9481SIngo Weinhold			return true;
22440bb9481SIngo Weinhold
22540bb9481SIngo Weinhold		ConditionVariableEntry entry;
22640bb9481SIngo Weinhold		fCondition.Add(&entry);
22740bb9481SIngo Weinhold
22840bb9481SIngo Weinhold		locker.Unlock();
22940bb9481SIngo Weinhold
23040bb9481SIngo Weinhold		return entry.Wait(B_RELATIVE_TIMEOUT, timeout) == B_OK;
23140bb9481SIngo Weinhold	}
23240bb9481SIngo Weinhold
23340bb9481SIngo Weinhold	void WakeUp()
23440bb9481SIngo Weinhold	{
23540bb9481SIngo Weinhold		if (fActivated)
23640bb9481SIngo Weinhold			return;
23740bb9481SIngo Weinhold
23840bb9481SIngo Weinhold		MutexLocker locker(fLock);
239