1/*
2 * Copyright 2008-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11#include <vm/VMCache.h>
12
13#include <stddef.h>
14#include <stdlib.h>
15
16#include <algorithm>
17
18#include <arch/cpu.h>
19#include <condition_variable.h>
20#include <heap.h>
21#include <int.h>
22#include <kernel.h>
23#include <slab/Slab.h>
24#include <smp.h>
25#include <tracing.h>
26#include <util/AutoLock.h>
27#include <vfs.h>
28#include <vm/vm.h>
29#include <vm/vm_page.h>
30#include <vm/vm_priv.h>
31#include <vm/vm_types.h>
32#include <vm/VMAddressSpace.h>
33#include <vm/VMArea.h>
34
35// needed for the factory only
36#include "VMAnonymousCache.h"
37#include "VMAnonymousNoSwapCache.h"
38#include "VMDeviceCache.h"
39#include "VMNullCache.h"
40#include "../cache/vnode_store.h"
41
42
43//#define TRACE_VM_CACHE
44#ifdef TRACE_VM_CACHE
45#	define TRACE(x) dprintf x
46#else
47#	define TRACE(x) ;
48#endif
49
50
51#if DEBUG_CACHE_LIST
52VMCache* gDebugCacheList;
53#endif
54static mutex sCacheListLock = MUTEX_INITIALIZER("global VMCache list");
55	// The lock is also needed when the debug feature is disabled.
56
57ObjectCache* gCacheRefObjectCache;
58ObjectCache* gAnonymousCacheObjectCache;
59ObjectCache* gAnonymousNoSwapCacheObjectCache;
60ObjectCache* gVnodeCacheObjectCache;
61ObjectCache* gDeviceCacheObjectCache;
62ObjectCache* gNullCacheObjectCache;
63
64
65struct VMCache::PageEventWaiter {
66	Thread*				thread;
67	PageEventWaiter*	next;
68	vm_page*			page;
69	uint32				events;
70};
71
72
73#if VM_CACHE_TRACING
74
75namespace VMCacheTracing {
76
77class VMCacheTraceEntry : public AbstractTraceEntry {
78	public:
79		VMCacheTraceEntry(VMCache* cache)
80			:
81			fCache(cache)
82		{
83#if VM_CACHE_TRACING_STACK_TRACE
84			fStackTrace = capture_tracing_stack_trace(
85				VM_CACHE_TRACING_STACK_TRACE, 0, true);
86				// Don't capture userland stack trace to avoid potential
87				// deadlocks.
88#endif
89		}
90
91#if VM_CACHE_TRACING_STACK_TRACE
92		virtual void DumpStackTrace(TraceOutput& out)
93		{
94			out.PrintStackTrace(fStackTrace);
95		}
96#endif
97
98		VMCache* Cache() const
99		{
100			return fCache;
101		}
102
103	protected:
104		VMCache*	fCache;
105#if VM_CACHE_TRACING_STACK_TRACE
106		tracing_stack_trace* fStackTrace;
107#endif
108};
109
110
111class Create : public VMCacheTraceEntry {
112	public:
113		Create(VMCache* cache)
114			:
115			VMCacheTraceEntry(cache)
116		{
117			Initialized();
118		}
119
120		virtual void AddDump(TraceOutput& out)
121		{
122			out.Print("vm cache create: -> cache: %p", fCache);
123		}
124};
125
126
127class Delete : public VMCacheTraceEntry {
128	public:
129		Delete(VMCache* cache)
130			:
131			VMCacheTraceEntry(cache)
132		{
133			Initialized();
134		}
135
136		virtual void AddDump(TraceOutput& out)
137		{
138			out.Print("vm cache delete: cache: %p", fCache);
139		}
140};
141
142
143class SetMinimalCommitment : public VMCacheTraceEntry {
144	public:
145		SetMinimalCommitment(VMCache* cache, off_t commitment)
146			:
147			VMCacheTraceEntry(cache),
148			fOldCommitment(cache->committed_size),
149			fCommitment(commitment)
150		{
151			Initialized();
152		}
153
154		virtual void AddDump(TraceOutput& out)
155		{
156			out.Print("vm cache set min commitment: cache: %p, "
157				"commitment: %" B_PRIdOFF " -> %" B_PRIdOFF, fCache,
158				fOldCommitment, fCommitment);
159		}
160
161	private:
162		off_t	fOldCommitment;
163		off_t	fCommitment;
164};
165
166
167class Resize : public VMCacheTraceEntry {
168	public:
169		Resize(VMCache* cache, off_t size)
170			:
171			VMCacheTraceEntry(cache),
172			fOldSize(cache->virtual_end),
173			fSize(size)
174		{
175			Initialized();
176		}
177
178		virtual void AddDump(TraceOutput& out)
179		{
180			out.Print("vm cache resize: cache: %p, size: %" B_PRIdOFF " -> %"
181				B_PRIdOFF, fCache, fOldSize, fSize);
182		}
183
184	private:
185		off_t	fOldSize;
186		off_t	fSize;
187};
188
189
190class AddConsumer : public VMCacheTraceEntry {
191	public:
192		AddConsumer(VMCache* cache, VMCache* consumer)
193			:
194			VMCacheTraceEntry(cache),
195			fConsumer(consumer)
196		{
197			Initialized();
198		}
199
200		virtual void AddDump(TraceOutput& out)
201		{
202			out.Print("vm cache add consumer: cache: %p, consumer: %p", fCache,
203				fConsumer);
204		}
205
206		VMCache* Consumer() const
207		{
208			return fConsumer;
209		}
210
211	private:
212		VMCache*	fConsumer;
213};
214
215
216class RemoveConsumer : public VMCacheTraceEntry {
217	public:
218		RemoveConsumer(VMCache* cache, VMCache* consumer)
219			:
220			VMCacheTraceEntry(cache),
221			fConsumer(consumer)
222		{
223			Initialized();
224		}
225
226		virtual void AddDump(TraceOutput& out)
227		{
228			out.Print("vm cache remove consumer: cache: %p, consumer: %p",
229				fCache, fConsumer);
230		}
231
232	private:
233		VMCache*	fConsumer;
234};
235
236
237class Merge : public VMCacheTraceEntry {
238	public:
239		Merge(VMCache* cache, VMCache* consumer)
240			:
241			VMCacheTraceEntry(cache),
242			fConsumer(consumer)
243		{
244			Initialized();
245		}
246
247		virtual void AddDump(TraceOutput& out)
248		{
249			out.Print("vm cache merge with consumer: cache: %p, consumer: %p",
250				fCache, fConsumer);
251		}
252
253	private:
254		VMCache*	fConsumer;
255};
256
257
258class InsertArea : public VMCacheTraceEntry {
259	public:
260		InsertArea(VMCache* cache, VMArea* area)
261			:
262			VMCacheTraceEntry(cache),
263			fArea(area)
264		{
265			Initialized();
266		}
267
268		virtual void AddDump(TraceOutput& out)
269		{
270			out.Print("vm cache insert area: cache: %p, area: %p", fCache,
271				fArea);
272		}
273
274		VMArea*	Area() const
275		{
276			return fArea;
277		}
278
279	private:
280		VMArea*	fArea;
281};
282
283
284class RemoveArea : public VMCacheTraceEntry {
285	public:
286		RemoveArea(VMCache* cache, VMArea* area)
287			:
288			VMCacheTraceEntry(cache),
289			fArea(area)
290		{
291			Initialized();
292		}
293
294		virtual void AddDump(TraceOutput& out)
295		{
296			out.Print("vm cache remove area: cache: %p, area: %p", fCache,
297				fArea);
298		}
299
300	private:
301		VMArea*	fArea;
302};
303
304}	// namespace VMCacheTracing
305
306#	define T(x) new(std::nothrow) VMCacheTracing::x;
307
308#	if VM_CACHE_TRACING >= 2
309
310namespace VMCacheTracing {
311
312class InsertPage : public VMCacheTraceEntry {
313	public:
314		InsertPage(VMCache* cache, vm_page* page, off_t offset)
315			:
316			VMCacheTraceEntry(cache),
317			fPage(page),
318			fOffset(offset)
319		{
320			Initialized();
321		}
322
323		virtual void AddDump(TraceOutput& out)
324		{
325			out.Print("vm cache insert page: cache: %p, page: %p, offset: %"
326				B_PRIdOFF, fCache, fPage, fOffset);
327		}
328
329	private:
330		vm_page*	fPage;
331		off_t		fOffset;
332};
333
334
335class RemovePage : public VMCacheTraceEntry {
336	public:
337		RemovePage(VMCache* cache, vm_page* page)
338			:
339			VMCacheTraceEntry(cache),
340			fPage(page)
341		{
342			Initialized();
343		}
344
345		virtual void AddDump(TraceOutput& out)
346		{
347			out.Print("vm cache remove page: cache: %p, page: %p", fCache,
348				fPage);
349		}
350
351	private:
352		vm_page*	fPage;
353};
354
355}	// namespace VMCacheTracing
356
357#		define T2(x) new(std::nothrow) VMCacheTracing::x;
358#	else
359#		define T2(x) ;
360#	endif
361#else
362#	define T(x) ;
363#	define T2(x) ;
364#endif
365
366
367//	#pragma mark - debugger commands
368
369
370#if VM_CACHE_TRACING
371
372
373static void*
374cache_stack_find_area_cache(const TraceEntryIterator& baseIterator, void* area)
375{
376	using namespace VMCacheTracing;
377
378	// find the previous "insert area" entry for the given area
379	TraceEntryIterator iterator = baseIterator;
380	TraceEntry* entry = iterator.Current();
381	while (entry != NULL) {
382		if (InsertArea* insertAreaEntry = dynamic_cast<InsertArea*>(entry)) {
383			if (insertAreaEntry->Area() == area)
384				return insertAreaEntry->Cache();
385		}
386
387		entry = iterator.Previous();
388	}
389
390	return NULL;
391}
392
393
394static void*
395cache_stack_find_consumer(const TraceEntryIterator& baseIterator, void* cache)
396{
397	using namespace VMCacheTracing;
398
399	// find the previous "add consumer" or "create" entry for the given cache
400	TraceEntryIterator iterator = baseIterator;
401	TraceEntry* entry = iterator.Current();
402	while (entry != NULL) {
403		if (Create* createEntry = dynamic_cast<Create*>(entry)) {
404			if (createEntry->Cache() == cache)
405				return NULL;
406		} else if (AddConsumer* addEntry = dynamic_cast<AddConsumer*>(entry)) {
407			if (addEntry->Consumer() == cache)
408				return addEntry->Cache();
409		}
410
411		entry = iterator.Previous();
412	}
413
414	return NULL;
415}
416
417
418static int
419command_cache_stack(int argc, char** argv)
420{
421	if (argc < 3 || argc > 4) {
422		print_debugger_command_usage(argv[0]);
423		return 0;
424	}
425
426	bool isArea = false;
427
428	int argi = 1;
429	if (argc == 4) {
430		if (strcmp(argv[argi], "area") != 0) {
431			print_debugger_command_usage(argv[0]);
432			return 0;
433		}
434
435		argi++;
436		isArea = true;
437	}
438
439	uint64 addressValue;
440	uint64 debugEntryIndex;
441	if (!evaluate_debug_expression(argv[argi++], &addressValue, false)
442		|| !evaluate_debug_expression(argv[argi++], &debugEntryIndex, false)) {
443		return 0;
444	}
445
446	TraceEntryIterator baseIterator;
447	if (baseIterator.MoveTo((int32)debugEntryIndex) == NULL) {
448		kprintf("Invalid tracing entry index %" B_PRIu64 "\n", debugEntryIndex);
449		return 0;
450	}
451
452	void* address = (void*)(addr_t)addressValue;
453
454	kprintf("cache stack for %s %p at %" B_PRIu64 ":\n",
455		isArea ? "area" : "cache", address, debugEntryIndex);
456	if (isArea) {
457		address = cache_stack_find_area_cache(baseIterator, address);
458		if (address == NULL) {
459			kprintf("  cache not found\n");
460			return 0;
461		}
462	}
463
464	while (address != NULL) {
465		kprintf("  %p\n", address);
466		address = cache_stack_find_consumer(baseIterator, address);
467	}
468
469	return 0;
470}
471
472
473#endif	// VM_CACHE_TRACING
474
475
476//	#pragma mark -
477
478
479status_t
480vm_cache_init(kernel_args* args)
481{
482	// Create object caches for the structures we allocate here.
483	gCacheRefObjectCache = create_object_cache("cache refs", sizeof(VMCacheRef),
484		0, NULL, NULL, NULL);
485	gAnonymousCacheObjectCache = create_object_cache("anon caches",
486		sizeof(VMAnonymousCache), 0, NULL, NULL, NULL);
487	gAnonymousNoSwapCacheObjectCache = create_object_cache(
488		"anon no-swap caches", sizeof(VMAnonymousNoSwapCache), 0, NULL, NULL,
489		NULL);
490	gVnodeCacheObjectCache = create_object_cache("vnode caches",
491		sizeof(VMVnodeCache), 0, NULL, NULL, NULL);
492	gDeviceCacheObjectCache = create_object_cache("device caches",
493		sizeof(VMDeviceCache), 0, NULL, NULL, NULL);
494	gNullCacheObjectCache = create_object_cache("null caches",
495		sizeof(VMNullCache), 0, NULL, NULL, NULL);
496
497	if (gCacheRefObjectCache == NULL || gAnonymousCacheObjectCache == NULL
498		|| gAnonymousNoSwapCacheObjectCache == NULL
499		|| gVnodeCacheObjectCache == NULL
500		|| gDeviceCacheObjectCache == NULL
501		|| gNullCacheObjectCache == NULL) {
502		panic("vm_cache_init(): Failed to create object caches!");
503		return B_NO_MEMORY;
504	}
505
506	return B_OK;
507}
508
509
510void
511vm_cache_init_post_heap()
512{
513#if VM_CACHE_TRACING
514	add_debugger_command_etc("cache_stack", &command_cache_stack,
515		"List the ancestors (sources) of a VMCache at the time given by "
516			"tracing entry index",
517		"[ \"area\" ] <address> <tracing entry index>\n"
518		"All ancestors (sources) of a given VMCache at the time given by the\n"
519		"tracing entry index are listed. If \"area\" is given the supplied\n"
520		"address is an area instead of a cache address. The listing will\n"
521		"start with the area's cache at that point.\n",
522		0);
523#endif	// VM_CACHE_TRACING
524}
525
526
527VMCache*
528vm_cache_acquire_locked_page_cache(vm_page* page, bool dontWait)
529{
530	mutex_lock(&sCacheListLock);
531
532	while (dontWait) {
533		VMCacheRef* cacheRef = page->CacheRef();
534		if (cacheRef == NULL) {
535			mutex_unlock(&sCacheListLock);
536			return NULL;
537		}
538
539		VMCache* cache = cacheRef->cache;
540		if (!cache->TryLock()) {
541			mutex_unlock(&sCacheListLock);
542			return NULL;
543		}
544
545		if (cacheRef == page->CacheRef()) {
546			mutex_unlock(&sCacheListLock);
547			cache->AcquireRefLocked();
548			return cache;
549		}
550
551		// the cache changed in the meantime
552		cache->Unlock();
553	}
554
555	while (true) {
556		VMCacheRef* cacheRef = page->CacheRef();
557		if (cacheRef == NULL) {
558			mutex_unlock(&sCacheListLock);
559			return NULL;
560		}
561
562		VMCache* cache = cacheRef->cache;
563		if (!cache->SwitchLock(&sCacheListLock)) {
564			// cache has been deleted
565			mutex_lock(&sCacheListLock);
566			continue;
567		}
568
569		mutex_lock(&sCacheListLock);
570		if (cache == page->Cache()) {
571			mutex_unlock(&sCacheListLock);
572			cache->AcquireRefLocked();
573			return cache;
574		}
575
576		// the cache changed in the meantime
577		cache->Unlock();
578	}
579}
580
581
582// #pragma mark - VMCache
583
584
585VMCacheRef::VMCacheRef(VMCache* cache)
586	:
587	cache(cache),
588	ref_count(1)
589{
590}
591
592
593// #pragma mark - VMCache
594
595
596bool
597VMCache::_IsMergeable() const
598{
599	return areas == NULL && temporary && !consumers.IsEmpty()
600		&& consumers.Head() == consumers.Tail();
601}
602
603
604VMCache::VMCache()
605	:
606	fCacheRef(NULL)
607{
608}
609
610
611VMCache::~VMCache()
612{
613	object_cache_delete(gCacheRefObjectCache, fCacheRef);
614}
615
616
617status_t
618VMCache::Init(uint32 cacheType, uint32 allocationFlags)
619{
620	mutex_init(&fLock, "VMCache");
621
622	areas = NULL;
623	fRefCount = 1;
624	source = NULL;
625	virtual_base = 0;
626	virtual_end = 0;
627	committed_size = 0;
628	temporary = 0;
629	page_count = 0;
630	fWiredPagesCount = 0;
631	type = cacheType;
632	fPageEventWaiters = NULL;
633
634#if DEBUG_CACHE_LIST
635	debug_previous = NULL;
636	debug_next = NULL;
637		// initialize in case the following fails
638#endif
639
640	fCacheRef = new(gCacheRefObjectCache, allocationFlags) VMCacheRef(this);
641	if (fCacheRef == NULL)
642		return B_NO_MEMORY;
643
644#if DEBUG_CACHE_LIST
645	mutex_lock(&sCacheListLock);
646
647	if (gDebugCacheList != NULL)
648		gDebugCacheList->debug_previous = this;
649	debug_next = gDebugCacheList;
650	gDebugCacheList = this;
651
652	mutex_unlock(&sCacheListLock);
653#endif
654
655	return B_OK;
656}
657
658
659void
660VMCache::Delete()
661{
662	if (areas != NULL)
663		panic("cache %p to be deleted still has areas", this);
664	if (!consumers.IsEmpty())
665		panic("cache %p to be deleted still has consumers", this);
666
667	T(Delete(this));
668
669	// free all of the pages in the cache
670	while (vm_page* page = pages.Root()) {
671		if (!page->mappings.IsEmpty() || page->WiredCount() != 0) {
672			panic("remove page %p from cache %p: page still has mappings!\n"
673				"@!page %p; cache %p", page, this, page, this);
674		}
675
676		// remove it
677		pages.Remove(page);
678		page->SetCacheRef(NULL);
679
680		TRACE(("vm_cache_release_ref: freeing page 0x%lx\n",
681			page->physical_page_number));
682		DEBUG_PAGE_ACCESS_START(page);
683		vm_page_free(this, page);
684	}
685
686	// remove the ref to the source
687	if (source)
688		source->_RemoveConsumer(this);
689
690	// We lock and unlock the sCacheListLock, even if the DEBUG_CACHE_LIST is
691	// not enabled. This synchronization point is needed for
692	// vm_cache_acquire_locked_page_cache().
693	mutex_lock(&sCacheListLock);
694
695#if DEBUG_CACHE_LIST
696	if (debug_previous)
697		debug_previous->debug_next = debug_next;
698	if (debug_next)
699		debug_next->debug_previous = debug_previous;
700	if (this == gDebugCacheList)
701		gDebugCacheList = debug_next;
702#endif
703
704	mutex_destroy(&fLock);
705
706	mutex_unlock(&sCacheListLock);
707
708	DeleteObject();
709}
710
711
712void
713VMCache::Unlock(bool consumerLocked)
714{
715	while (fRefCount == 1 && _IsMergeable()) {
716		VMCache* consumer = consumers.Head();
717		if (consumerLocked) {
718			_MergeWithOnlyConsumer();
719		} else if (consumer->TryLock()) {
720			_MergeWithOnlyConsumer();
721			consumer->Unlock();
722		} else {
723			// Someone else has locked the consumer ATM. Unlock this cache and
724			// wait for the consumer lock. Increment the cache's ref count
725			// temporarily, so that no one else will try what we are doing or
726			// delete the cache.
727			fRefCount++;
728			bool consumerLockedTemp = consumer->SwitchLock(&fLock);
729			Lock();
730			fRefCount--;
731
732			if (consumerLockedTemp) {
733				if (fRefCount == 1 && _IsMergeable()
734						&& consumer == consumers.Head()) {
735					// nothing has changed in the meantime -- merge
736					_MergeWithOnlyConsumer();
737				}
738
739				consumer->Unlock();
740			}
741		}
742	}
743
744	if (fRefCount == 0) {
745		// delete this cache
746		Delete();
747	} else
748		mutex_unlock(&fLock);
749}
750
751
752vm_page*
753VMCache::LookupPage(off_t offset)
754{
755	AssertLocked();
756
757	vm_page* page = pages.Lookup((page_num_t)(offset >> PAGE_SHIFT));
758
759#if KDEBUG
760	if (page != NULL && page->Cache() != this)
761		panic("page %p not in cache %p\n", page, this);
762#endif
763
764	return page;
765}
766
767
768void
769VMCache::InsertPage(vm_page* page, off_t offset)
770{
771	TRACE(("VMCache::InsertPage(): cache %p, page %p, offset %" B_PRIdOFF "\n",
772		this, page, offset));
773	AssertLocked();
774
775	if (page->CacheRef() != NULL) {
776		panic("insert page %p into cache %p: page cache is set to %p\n",
777			page, this, page->Cache());
778	}
779
780	T2(InsertPage(this, page, offset));
781
782	page->cache_offset = (page_num_t)(offset >> PAGE_SHIFT);
783	page_count++;
784	page->SetCacheRef(fCacheRef);
785
786#if KDEBUG
787	vm_page* otherPage = pages.Lookup(page->cache_offset);
788	if (otherPage != NULL) {
789		panic("VMCache::InsertPage(): there's already page %p with cache "
790			"offset %" B_PRIuPHYSADDR " in cache %p; inserting page %p",
791			otherPage, page->cache_offset, this, page);
792	}
793#endif	// KDEBUG
794
795	pages.Insert(page);
796
797	if (page->WiredCount() > 0)
798		IncrementWiredPagesCount();
799}
800
801
802/*!	Removes the vm_page from this cache. Of course, the page must
803	really be in this cache or evil things will happen.
804	The cache lock must be held.
805*/
806void
807VMCache::RemovePage(vm_page* page)
808{
809	TRACE(("VMCache::RemovePage(): cache %p, page %p\n", this, page));
810	AssertLocked();
811
812	if (page->Cache() != this) {
813		panic("remove page %p from cache %p: page cache is set to %p\n", page,
814			this, page->Cache());
815	}
816
817	T2(RemovePage(this, page));
818
819	pages.Remove(page);
820	page_count--;
821	page->SetCacheRef(NULL);
822
823	if (page->WiredCount() > 0)
824		DecrementWiredPagesCount();
825}
826
827
828/*!	Moves the given page from its current cache inserts it into this cache.
829	Both caches must be locked.
830*/
831void
832VMCache::MovePage(vm_page* page)
833{
834	VMCache* oldCache = page->Cache();
835
836	AssertLocked();
837	oldCache->AssertLocked();
838
839	// remove from old cache
840	oldCache->pages.Remove(page);
841	oldCache->page_count--;
842	T2(RemovePage(oldCache, page));
843
844	// insert here
845	pages.Insert(page);
846	page_count++;
847	page->SetCacheRef(fCacheRef);
848
849	if (page->WiredCount() > 0) {
850		IncrementWiredPagesCount();
851		oldCache->DecrementWiredPagesCount();
852	}
853
854	T2(InsertPage(this, page, page->cache_offset << PAGE_SHIFT));
855}
856
857
858/*!	Moves all pages from the given cache to this one.
859	Both caches must be locked. This cache must be empty.
860*/
861void
862VMCache::MoveAllPages(VMCache* fromCache)
863{
864	AssertLocked();
865	fromCache->AssertLocked();
866	ASSERT(page_count == 0);
867
868	std::swap(fromCache->pages, pages);
869	page_count = fromCache->page_count;
870	fromCache->page_count = 0;
871	fWiredPagesCount = fromCache->fWiredPagesCount;
872	fromCache->fWiredPagesCount = 0;
873
874	// swap the VMCacheRefs
875	mutex_lock(&sCacheListLock);
876	std::swap(fCacheRef, fromCache->fCacheRef);
877	fCacheRef->cache = this;
878	fromCache->fCacheRef->cache = fromCache;
879	mutex_unlock(&sCacheListLock);
880
881#if VM_CACHE_TRACING >= 2
882	for (VMCachePagesTree::Iterator it = pages.GetIterator();
883			vm_page* page = it.Next();) {
884		T2(RemovePage(fromCache, page));
885		T2(InsertPage(this, page, page->cache_offset << PAGE_SHIFT));
886	}
887#endif
888}
889
890
891/*!	Waits until one or more events happened for a given page which belongs to
892	this cache.
893	The cache must be locked. It will be unlocked by the method. \a relock
894	specifies whether the method shall re-lock the cache before returning.
895	\param page The page for which to wait.
896	\param events The mask of events the caller is interested in.
897	\param relock If \c true, the cache will be locked when returning,
898		otherwise it won't be locked.
899*/
900void
901VMCache::WaitForPageEvents(vm_page* page, uint32 events, bool relock)
902{
903	PageEventWaiter waiter;
904	waiter.thread = thread_get_current_thread();
905	waiter.next = fPageEventWaiters;
906	waiter.page = page;
907	waiter.events = events;
908
909	fPageEventWaiters = &waiter;
910
911	thread_prepare_to_block(waiter.thread, 0, THREAD_BLOCK_TYPE_OTHER,
912		"cache page events");
913
914	Unlock();
915	thread_block();
916
917	if (relock)
918		Lock();
919}
920
921
922/*!	Makes this case the source of the \a consumer cache,
923	and adds the \a consumer to its list.
924	This also grabs a reference to the source cache.
925	Assumes you have the cache and the consumer's lock held.
926*/
927void
928VMCache::AddConsumer(VMCache* consumer)
929{
930	TRACE(("add consumer vm cache %p to cache %p\n", consumer, this));
931	AssertLocked();
932	consumer->AssertLocked();
933
934	T(AddConsumer(this, consumer));
935
936	consumer->source = this;
937	consumers.Add(consumer);
938
939	AcquireRefLocked();
940	AcquireStoreRef();
941}
942
943
944/*!	Adds the \a area to this cache.
945	Assumes you have the locked the cache.
946*/
947status_t
948VMCache::InsertAreaLocked(VMArea* area)
949{
950	TRACE(("VMCache::InsertAreaLocked(cache %p, area %p)\n", this, area));
951	AssertLocked();
952
953	T(InsertArea(this, area));
954
955	area->cache_next = areas;
956	if (area->cache_next)
957		area->cache_next->cache_prev = area;
958	area->cache_prev = NULL;
959	areas = area;
960
961	AcquireStoreRef();
962
963	return B_OK;
964}
965
966
967status_t
968VMCache::RemoveArea(VMArea* area)
969{
970	TRACE(("VMCache::RemoveArea(cache %p, area %p)\n", this, area));
971
972	T(RemoveArea(this, area));
973
974	// We release the store reference first, since otherwise we would reverse
975	// the locking order or even deadlock ourselves (... -> free_vnode() -> ...
976	// -> bfs_remove_vnode() -> ... -> file_cache_set_size() -> mutex_lock()).
977	// Also cf. _RemoveConsumer().
978	ReleaseStoreRef();
979
980	AutoLocker<VMCache> locker(this);
981
982	if (area->cache_prev)
983		area->cache_prev->cache_next = area->cache_next;
984	if (area->cache_next)
985		area->cache_next->cache_prev = area->cache_prev;
986	if (areas == area)
987		areas = area->cache_next;
988
989	return B_OK;
990}
991
992
993/*!	Transfers the areas from \a fromCache to this cache. This cache must not
994	have areas yet. Both caches must be locked.
995*/
996void
997VMCache::TransferAreas(VMCache* fromCache)
998{
999	AssertLocked();
1000	fromCache->AssertLocked();
1001	ASSERT(areas == NULL);
1002
1003	areas = fromCache->areas;
1004	fromCache->areas = NULL;
1005
1006	for (VMArea* area = areas; area != NULL; area = area->cache_next) {
1007		area->cache = this;
1008		AcquireRefLocked();
1009		fromCache->ReleaseRefLocked();
1010
1011		T(RemoveArea(fromCache, area));
1012		T(InsertArea(this, area));
1013	}
1014}
1015
1016
1017uint32
1018VMCache::CountWritableAreas(VMArea* ignoreArea) const
1019{
1020	uint32 count = 0;
1021
1022	for (VMArea* area = areas; area != NULL; area = area->cache_next) {
1023		if (area != ignoreArea
1024			&& (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0) {
1025			count++;
1026		}
1027	}
1028
1029	return count;
1030}
1031
1032
1033status_t
1034VMCache::WriteModified()
1035{
1036	TRACE(("VMCache::WriteModified(cache = %p)\n", this));
1037
1038	if (temporary)
1039		return B_OK;
1040
1041	Lock();
1042	status_t status = vm_page_write_modified_pages(this);
1043	Unlock();
1044
1045	return status;
1046}
1047
1048
1049/*!	Commits the memory to the store if the \a commitment is larger than
1050	what's committed already.
1051	Assumes you have the cache's lock held.
1052*/
1053status_t
1054VMCache::SetMinimalCommitment(off_t commitment, int priority)
1055{
1056	TRACE(("VMCache::SetMinimalCommitment(cache %p, commitment %" B_PRIdOFF
1057		")\n", this, commitment));
1058	AssertLocked();
1059
1060	T(SetMinimalCommitment(this, commitment));
1061
1062	status_t status = B_OK;
1063
1064	// If we don't have enough committed space to cover through to the new end
1065	// of the area...
1066	if (committed_size < commitment) {
1067		// ToDo: should we check if the cache's virtual size is large
1068		//	enough for a commitment of that size?
1069
1070		// try to commit more memory
1071		status = Commit(commitment, priority);
1072	}
1073
1074	return status;
1075}
1076
1077
1078/*!	This function updates the size field of the cache.
1079	If needed, it will free up all pages that don't belong to the cache anymore.
1080	The cache lock must be held when you call it.
1081	Since removed pages don't belong to the cache any longer, they are not
1082	written back before they will be removed.
1083
1084	Note, this function may temporarily release the cache lock in case it
1085	has to wait for busy pages.
1086*/
1087status_t
1088VMCache::Resize(off_t newSize, int priority)
1089{
1090	TRACE(("VMCache::Resize(cache %p, newSize %" B_PRIdOFF ") old size %"
1091		B_PRIdOFF "\n", this, newSize, this->virtual_end));
1092	this->AssertLocked();
1093
1094	T(Resize(this, newSize));
1095
1096	status_t status = Commit(newSize - virtual_base, priority);
1097	if (status != B_OK)
1098		return status;
1099
1100	uint32 oldPageCount = (uint32)((virtual_end + B_PAGE_SIZE - 1)
1101		>> PAGE_SHIFT);
1102	uint32 newPageCount = (uint32)((newSize + B_PAGE_SIZE - 1) >> PAGE_SHIFT);
1103
1104	if (newPageCount < oldPageCount) {
1105		// we need to remove all pages in the cache outside of the new virtual
1106		// size
1107		for (VMCachePagesTree::Iterator it
1108					= pages.GetIterator(newPageCount, true, true);
1109				vm_page* page = it.Next();) {
1110			if (page->busy) {
1111				if (page->busy_writing) {
1112					// We cannot wait for the page to become available
1113					// as we might cause a deadlock this way
1114					page->busy_writing = false;
1115						// this will notify the writer to free the page
1116				} else {
1117					// wait for page to become unbusy
1118					WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true);
1119
1120					// restart from the start of the list
1121					it = pages.GetIterator(newPageCount, true, true);
1122				}
1123				continue;
1124			}
1125
1126			// remove the page and put it into the free queue
1127			DEBUG_PAGE_ACCESS_START(page);
1128			vm_remove_all_page_mappings(page);
1129			ASSERT(page->WiredCount() == 0);
1130				// TODO: Find a real solution! If the page is wired
1131				// temporarily (e.g. by lock_memory()), we actually must not
1132				// unmap it!
1133			RemovePage(page);
1134			vm_page_free(this, page);
1135				// Note: When iterating through a IteratableSplayTree
1136				// removing the current node is safe.
1137		}
1138	}
1139
1140	virtual_end = newSize;
1141	return B_OK;
1142}
1143
1144
1145/*!	You have to call this function with the VMCache lock held. */
1146status_t
1147VMCache::FlushAndRemoveAllPages()
1148{
1149	ASSERT_LOCKED_MUTEX(&fLock);
1150
1151	while (page_count > 0) {
1152		// write back modified pages
1153		status_t status = vm_page_write_modified_pages(this);
1154		if (status != B_OK)
1155			return status;
1156
1157		// remove pages
1158		for (VMCachePagesTree::Iterator it = pages.GetIterator();
1159				vm_page* page = it.Next();) {
1160			if (page->busy) {
1161				// wait for page to become unbusy
1162				WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, true);
1163
1164				// restart from the start of the list
1165				it = pages.GetIterator();
1166				continue;
1167			}
1168
1169			// skip modified pages -- they will be written back in the next
1170			// iteration
1171			if (page->State() == PAGE_STATE_MODIFIED)
1172				continue;
1173
1174			// We can't remove mapped pages.
1175			if (page->IsMapped())
1176				return B_BUSY;
1177
1178			DEBUG_PAGE_ACCESS_START(page);
1179			RemovePage(page);
1180			vm_page_free(this, page);
1181				// Note: When iterating through a IteratableSplayTree
1182				// removing the current node is safe.
1183		}
1184	}
1185
1186	return B_OK;
1187}
1188
1189
1190status_t
1191VMCache::Commit(off_t size, int priority)
1192{
1193	committed_size = size;
1194	return B_OK;
1195}
1196
1197
1198/*!	Returns whether the cache's underlying backing store could deliver the
1199	page at the given offset.
1200
1201	Basically it returns whether a Read() at \a offset would at least read a
1202	partial page (assuming that no unexpected errors occur or the situation
1203	changes in the meantime).
1204*/
1205bool
1206VMCache::HasPage(off_t offset)
1207{
1208	// In accordance with Fault() the default implementation doesn't have a
1209	// backing store and doesn't allow faults.
1210	return false;
1211}
1212
1213
1214status_t
1215VMCache::Read(off_t offset, const generic_io_vec *vecs, size_t count,
1216	uint32 flags, generic_size_t *_numBytes)
1217{
1218	return B_ERROR;
1219}
1220
1221
1222status_t
1223VMCache::Write(off_t offset, const generic_io_vec *vecs, size_t count,
1224	uint32 flags, generic_size_t *_numBytes)
1225{
1226	return B_ERROR;
1227}
1228
1229
1230status_t
1231VMCache::WriteAsync(off_t offset, const generic_io_vec* vecs, size_t count,
1232	generic_size_t numBytes, uint32 flags, AsyncIOCallback* callback)
1233{
1234	// Not supported, fall back to the synchronous hook.
1235	generic_size_t transferred = numBytes;
1236	status_t error = Write(offset, vecs, count, flags, &transferred);
1237
1238	if (callback != NULL)
1239		callback->IOFinished(error, transferred != numBytes, transferred);
1240
1241	return error;
1242}
1243
1244
1245/*!	\brief Returns whether the cache can write the page at the given offset.
1246
1247	The cache must be locked when this function is invoked.
1248
1249	@param offset The page offset.
1250	@return \c true, if the page can be written, \c false otherwise.
1251*/
1252bool
1253VMCache::CanWritePage(off_t offset)
1254{
1255	return false;
1256}
1257
1258
1259status_t
1260VMCache::Fault(struct VMAddressSpace *aspace, off_t offset)
1261{
1262	return B_BAD_ADDRESS;
1263}
1264
1265
1266void
1267VMCache::Merge(VMCache* source)
1268{
1269	for (VMCachePagesTree::Iterator it = source->pages.GetIterator();
1270			vm_page* page = it.Next();) {
1271		// Note: Removing the current node while iterating through a
1272		// IteratableSplayTree is safe.
1273		vm_page* consumerPage = LookupPage(
1274			(off_t)page->cache_offset << PAGE_SHIFT);
1275		if (consumerPage == NULL) {
1276			// the page is not yet in the consumer cache - move it upwards
1277			MovePage(page);
1278		}
1279	}
1280}
1281
1282
1283status_t
1284VMCache::AcquireUnreferencedStoreRef()
1285{
1286	return B_OK;
1287}
1288
1289
1290void
1291VMCache::AcquireStoreRef()
1292{
1293}
1294
1295
1296void
1297VMCache::ReleaseStoreRef()
1298{
1299}
1300
1301
1302/*!	Kernel debugger version of HasPage().
1303	Does not do any locking.
1304*/
1305bool
1306VMCache::DebugHasPage(off_t offset)
1307{
1308	// default that works for all subclasses that don't lock anyway
1309	return HasPage(offset);
1310}
1311
1312
1313/*!	Kernel debugger version of LookupPage().
1314	Does not do any locking.
1315*/
1316vm_page*
1317VMCache::DebugLookupPage(off_t offset)
1318{
1319	return pages.Lookup((page_num_t)(offset >> PAGE_SHIFT));
1320}
1321
1322
1323void
1324VMCache::Dump(bool showPages) const
1325{
1326	kprintf("CACHE %p:\n", this);
1327	kprintf("  ref_count:    %" B_PRId32 "\n", RefCount());
1328	kprintf("  source:       %p\n", source);
1329	kprintf("  type:         %s\n", vm_cache_type_to_string(type));
1330	kprintf("  virtual_base: 0x%" B_PRIx64 "\n", virtual_base);
1331	kprintf("  virtual_end:  0x%" B_PRIx64 "\n", virtual_end);
1332	kprintf("  temporary:    %" B_PRIu32 "\n", temporary);
1333	kprintf("  lock:         %p\n", &fLock);
1334#if KDEBUG
1335	kprintf("  lock.holder:  %" B_PRId32 "\n", fLock.holder);
1336#endif
1337	kprintf("  areas:\n");
1338
1339	for (VMArea* area = areas; area != NULL; area = area->cache_next) {
1340		kprintf("    area 0x%" B_PRIx32 ", %s\n", area->id, area->name);
1341		kprintf("\tbase_addr:  0x%lx, size: 0x%lx\n", area->Base(),
1342			area->Size());
1343		kprintf("\tprotection: 0x%" B_PRIx32 "\n", area->protection);
1344		kprintf("\towner:      0x%" B_PRIx32 "\n", area->address_space->ID());
1345	}
1346
1347	kprintf("  consumers:\n");
1348	for (ConsumerList::ConstIterator it = consumers.GetIterator();
1349		 	VMCache* consumer = it.Next();) {
1350		kprintf("\t%p\n", consumer);
1351	}
1352
1353	kprintf("  pages:\n");
1354	if (showPages) {
1355		for (VMCachePagesTree::ConstIterator it = pages.GetIterator();
1356				vm_page* page = it.Next();) {
1357			if (!vm_page_is_dummy(page)) {
1358				kprintf("\t%p ppn %#" B_PRIxPHYSADDR " offset %#" B_PRIxPHYSADDR
1359					" state %u (%s) wired_count %u\n", page,
1360					page->physical_page_number, page->cache_offset,
1361					page->State(), page_state_to_string(page->State()),
1362					page->WiredCount());
1363			} else {
1364				kprintf("\t%p DUMMY PAGE state %u (%s)\n",
1365					page, page->State(), page_state_to_string(page->State()));
1366			}
1367		}
1368	} else
1369		kprintf("\t%" B_PRIu32 " in cache\n", page_count);
1370}
1371
1372
1373/*!	Wakes up threads waiting for page events.
1374	\param page The page for which events occurred.
1375	\param events The mask of events that occurred.
1376*/
1377void
1378VMCache::_NotifyPageEvents(vm_page* page, uint32 events)
1379{
1380	PageEventWaiter** it = &fPageEventWaiters;
1381	while (PageEventWaiter* waiter = *it) {
1382		if (waiter->page == page && (waiter->events & events) != 0) {
1383			// remove from list and unblock
1384			*it = waiter->next;
1385			thread_unblock(waiter->thread, B_OK);
1386		} else
1387			it = &waiter->next;
1388	}
1389}
1390
1391
1392/*!	Merges the given cache with its only consumer.
1393	The caller must hold both the cache's and the consumer's lock. The method
1394	does release neither lock.
1395*/
1396void
1397VMCache::_MergeWithOnlyConsumer()
1398{
1399	VMCache* consumer = consumers.RemoveHead();
1400
1401	TRACE(("merge vm cache %p (ref == %" B_PRId32 ") with vm cache %p\n",
1402		this, this->fRefCount, consumer));
1403
1404	T(Merge(this, consumer));
1405
1406	// merge the cache
1407	consumer->Merge(this);
1408
1409	// The remaining consumer has got a new source.
1410	if (source != NULL) {
1411		VMCache* newSource = source;
1412
1413		newSource->Lock();
1414
1415		newSource->consumers.Remove(this);
1416		newSource->consumers.Add(consumer);
1417		consumer->source = newSource;
1418		source = NULL;
1419
1420		newSource->Unlock();
1421	} else
1422		consumer->source = NULL;
1423
1424	// Release the reference the cache's consumer owned. The consumer takes
1425	// over the cache's ref to its source (if any) instead.
1426	ReleaseRefLocked();
1427}
1428
1429
1430/*!	Removes the \a consumer from this cache.
1431	It will also release the reference to the cache owned by the consumer.
1432	Assumes you have the consumer's cache lock held. This cache must not be
1433	locked.
1434*/
1435void
1436VMCache::_RemoveConsumer(VMCache* consumer)
1437{
1438	TRACE(("remove consumer vm cache %p from cache %p\n", consumer, this));
1439	consumer->AssertLocked();
1440
1441	T(RemoveConsumer(this, consumer));
1442
1443	// Remove the store ref before locking the cache. Otherwise we'd call into
1444	// the VFS while holding the cache lock, which would reverse the usual
1445	// locking order.
1446	ReleaseStoreRef();
1447
1448	// remove the consumer from the cache, but keep its reference until later
1449	Lock();
1450	consumers.Remove(consumer);
1451	consumer->source = NULL;
1452
1453	ReleaseRefAndUnlock();
1454}
1455
1456
1457// #pragma mark - VMCacheFactory
1458	// TODO: Move to own source file!
1459
1460
1461/*static*/ status_t
1462VMCacheFactory::CreateAnonymousCache(VMCache*& _cache, bool canOvercommit,
1463	int32 numPrecommittedPages, int32 numGuardPages, bool swappable,
1464	int priority)
1465{
1466	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1467		| HEAP_DONT_LOCK_KERNEL_SPACE;
1468	if (priority >= VM_PRIORITY_VIP)
1469		allocationFlags |= HEAP_PRIORITY_VIP;
1470
1471#if ENABLE_SWAP_SUPPORT
1472	if (swappable) {
1473		VMAnonymousCache* cache
1474			= new(gAnonymousCacheObjectCache, allocationFlags) VMAnonymousCache;
1475		if (cache == NULL)
1476			return B_NO_MEMORY;
1477
1478		status_t error = cache->Init(canOvercommit, numPrecommittedPages,
1479			numGuardPages, allocationFlags);
1480		if (error != B_OK) {
1481			cache->Delete();
1482			return error;
1483		}
1484
1485		T(Create(cache));
1486
1487		_cache = cache;
1488		return B_OK;
1489	}
1490#endif
1491
1492	VMAnonymousNoSwapCache* cache
1493		= new(gAnonymousNoSwapCacheObjectCache, allocationFlags)
1494			VMAnonymousNoSwapCache;
1495	if (cache == NULL)
1496		return B_NO_MEMORY;
1497
1498	status_t error = cache->Init(canOvercommit, numPrecommittedPages,
1499		numGuardPages, allocationFlags);
1500	if (error != B_OK) {
1501		cache->Delete();
1502		return error;
1503	}
1504
1505	T(Create(cache));
1506
1507	_cache = cache;
1508	return B_OK;
1509}
1510
1511
1512/*static*/ status_t
1513VMCacheFactory::CreateVnodeCache(VMCache*& _cache, struct vnode* vnode)
1514{
1515	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1516		| HEAP_DONT_LOCK_KERNEL_SPACE;
1517		// Note: Vnode cache creation is never VIP.
1518
1519	VMVnodeCache* cache
1520		= new(gVnodeCacheObjectCache, allocationFlags) VMVnodeCache;
1521	if (cache == NULL)
1522		return B_NO_MEMORY;
1523
1524	status_t error = cache->Init(vnode, allocationFlags);
1525	if (error != B_OK) {
1526		cache->Delete();
1527		return error;
1528	}
1529
1530	T(Create(cache));
1531
1532	_cache = cache;
1533	return B_OK;
1534}
1535
1536
1537/*static*/ status_t
1538VMCacheFactory::CreateDeviceCache(VMCache*& _cache, addr_t baseAddress)
1539{
1540	const uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1541		| HEAP_DONT_LOCK_KERNEL_SPACE;
1542		// Note: Device cache creation is never VIP.
1543
1544	VMDeviceCache* cache
1545		= new(gDeviceCacheObjectCache, allocationFlags) VMDeviceCache;
1546	if (cache == NULL)
1547		return B_NO_MEMORY;
1548
1549	status_t error = cache->Init(baseAddress, allocationFlags);
1550	if (error != B_OK) {
1551		cache->Delete();
1552		return error;
1553	}
1554
1555	T(Create(cache));
1556
1557	_cache = cache;
1558	return B_OK;
1559}
1560
1561
1562/*static*/ status_t
1563VMCacheFactory::CreateNullCache(int priority, VMCache*& _cache)
1564{
1565	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
1566		| HEAP_DONT_LOCK_KERNEL_SPACE;
1567	if (priority >= VM_PRIORITY_VIP)
1568		allocationFlags |= HEAP_PRIORITY_VIP;
1569
1570	VMNullCache* cache
1571		= new(gNullCacheObjectCache, allocationFlags) VMNullCache;
1572	if (cache == NULL)
1573		return B_NO_MEMORY;
1574
1575	status_t error = cache->Init(allocationFlags);
1576	if (error != B_OK) {
1577		cache->Delete();
1578		return error;
1579	}
1580
1581	T(Create(cache));
1582
1583	_cache = cache;
1584	return B_OK;
1585}
1586