1/*
2 * Copyright 2009-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3 * Copyright 2002-2010, Axel D��rfler, axeld@pinc-software.de.
4 * Distributed under the terms of the MIT License.
5 *
6 * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7 * Distributed under the terms of the NewOS License.
8 */
9
10
11#include <vm/vm.h>
12
13#include <ctype.h>
14#include <stdlib.h>
15#include <stdio.h>
16#include <string.h>
17#include <sys/mman.h>
18
19#include <algorithm>
20
21#include <OS.h>
22#include <KernelExport.h>
23
24#include <AutoDeleter.h>
25
26#include <symbol_versioning.h>
27
28#include <arch/cpu.h>
29#include <arch/vm.h>
30#include <arch/user_memory.h>
31#include <boot/elf.h>
32#include <boot/stage2.h>
33#include <condition_variable.h>
34#include <console.h>
35#include <debug.h>
36#include <file_cache.h>
37#include <fs/fd.h>
38#include <heap.h>
39#include <kernel.h>
40#include <int.h>
41#include <lock.h>
42#include <low_resource_manager.h>
43#include <slab/Slab.h>
44#include <smp.h>
45#include <system_info.h>
46#include <thread.h>
47#include <team.h>
48#include <tracing.h>
49#include <util/AutoLock.h>
50#include <vm/vm_page.h>
51#include <vm/vm_priv.h>
52#include <vm/VMAddressSpace.h>
53#include <vm/VMArea.h>
54#include <vm/VMCache.h>
55
56#include "VMAddressSpaceLocking.h"
57#include "VMAnonymousCache.h"
58#include "VMAnonymousNoSwapCache.h"
59#include "IORequest.h"
60
61
62//#define TRACE_VM
63//#define TRACE_FAULTS
64#ifdef TRACE_VM
65#	define TRACE(x) dprintf x
66#else
67#	define TRACE(x) ;
68#endif
69#ifdef TRACE_FAULTS
70#	define FTRACE(x) dprintf x
71#else
72#	define FTRACE(x) ;
73#endif
74
75
76namespace {
77
78class AreaCacheLocking {
79public:
80	inline bool Lock(VMCache* lockable)
81	{
82		return false;
83	}
84
85	inline void Unlock(VMCache* lockable)
86	{
87		vm_area_put_locked_cache(lockable);
88	}
89};
90
91class AreaCacheLocker : public AutoLocker<VMCache, AreaCacheLocking> {
92public:
93	inline AreaCacheLocker(VMCache* cache = NULL)
94		: AutoLocker<VMCache, AreaCacheLocking>(cache, true)
95	{
96	}
97
98	inline AreaCacheLocker(VMArea* area)
99		: AutoLocker<VMCache, AreaCacheLocking>()
100	{
101		SetTo(area);
102	}
103
104	inline void SetTo(VMCache* cache, bool alreadyLocked)
105	{
106		AutoLocker<VMCache, AreaCacheLocking>::SetTo(cache, alreadyLocked);
107	}
108
109	inline void SetTo(VMArea* area)
110	{
111		return AutoLocker<VMCache, AreaCacheLocking>::SetTo(
112			area != NULL ? vm_area_get_locked_cache(area) : NULL, true, true);
113	}
114};
115
116
117class VMCacheChainLocker {
118public:
119	VMCacheChainLocker()
120		:
121		fTopCache(NULL),
122		fBottomCache(NULL)
123	{
124	}
125
126	VMCacheChainLocker(VMCache* topCache)
127		:
128		fTopCache(topCache),
129		fBottomCache(topCache)
130	{
131	}
132
133	~VMCacheChainLocker()
134	{
135		Unlock();
136	}
137
138	void SetTo(VMCache* topCache)
139	{
140		fTopCache = topCache;
141		fBottomCache = topCache;
142
143		if (topCache != NULL)
144			topCache->SetUserData(NULL);
145	}
146
147	VMCache* LockSourceCache()
148	{
149		if (fBottomCache == NULL || fBottomCache->source == NULL)
150			return NULL;
151
152		VMCache* previousCache = fBottomCache;
153
154		fBottomCache = fBottomCache->source;
155		fBottomCache->Lock();
156		fBottomCache->AcquireRefLocked();
157		fBottomCache->SetUserData(previousCache);
158
159		return fBottomCache;
160	}
161
162	void LockAllSourceCaches()
163	{
164		while (LockSourceCache() != NULL) {
165		}
166	}
167
168	void Unlock(VMCache* exceptCache = NULL)
169	{
170		if (fTopCache == NULL)
171			return;
172
173		// Unlock caches in source -> consumer direction. This is important to
174		// avoid double-locking and a reversal of locking order in case a cache
175		// is eligable for merging.
176		VMCache* cache = fBottomCache;
177		while (cache != NULL) {
178			VMCache* nextCache = (VMCache*)cache->UserData();
179			if (cache != exceptCache)
180				cache->ReleaseRefAndUnlock(cache != fTopCache);
181
182			if (cache == fTopCache)
183				break;
184
185			cache = nextCache;
186		}
187
188		fTopCache = NULL;
189		fBottomCache = NULL;
190	}
191
192	void UnlockKeepRefs(bool keepTopCacheLocked)
193	{
194		if (fTopCache == NULL)
195			return;
196
197		VMCache* nextCache = fBottomCache;
198		VMCache* cache = NULL;
199
200		while (keepTopCacheLocked
201				? nextCache != fTopCache : cache != fTopCache) {
202			cache = nextCache;
203			nextCache = (VMCache*)cache->UserData();
204			cache->Unlock(cache != fTopCache);
205		}
206	}
207
208	void RelockCaches(bool topCacheLocked)
209	{
210		if (fTopCache == NULL)
211			return;
212
213		VMCache* nextCache = fTopCache;
214		VMCache* cache = NULL;
215		if (topCacheLocked) {
216			cache = nextCache;
217			nextCache = cache->source;
218		}
219
220		while (cache != fBottomCache && nextCache != NULL) {
221			VMCache* consumer = cache;
222			cache = nextCache;
223			nextCache = cache->source;
224			cache->Lock();
225			cache->SetUserData(consumer);
226		}
227	}
228
229private:
230	VMCache*	fTopCache;
231	VMCache*	fBottomCache;
232};
233
234} // namespace
235
236
237// The memory reserve an allocation of the certain priority must not touch.
238static const size_t kMemoryReserveForPriority[] = {
239	VM_MEMORY_RESERVE_USER,		// user
240	VM_MEMORY_RESERVE_SYSTEM,	// system
241	0							// VIP
242};
243
244
245ObjectCache* gPageMappingsObjectCache;
246
247static rw_lock sAreaCacheLock = RW_LOCK_INITIALIZER("area->cache");
248
249static off_t sAvailableMemory;
250static off_t sNeededMemory;
251static mutex sAvailableMemoryLock = MUTEX_INITIALIZER("available memory lock");
252static uint32 sPageFaults;
253
254static VMPhysicalPageMapper* sPhysicalPageMapper;
255
256#if DEBUG_CACHE_LIST
257
258struct cache_info {
259	VMCache*	cache;
260	addr_t		page_count;
261	addr_t		committed;
262};
263
264static const int kCacheInfoTableCount = 100 * 1024;
265static cache_info* sCacheInfoTable;
266
267#endif	// DEBUG_CACHE_LIST
268
269
270// function declarations
271static void delete_area(VMAddressSpace* addressSpace, VMArea* area,
272	bool addressSpaceCleanup);
273static status_t vm_soft_fault(VMAddressSpace* addressSpace, addr_t address,
274	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage);
275static status_t map_backing_store(VMAddressSpace* addressSpace,
276	VMCache* cache, off_t offset, const char* areaName, addr_t size, int wiring,
277	int protection, int mapping, uint32 flags,
278	const virtual_address_restrictions* addressRestrictions, bool kernel,
279	VMArea** _area, void** _virtualAddress);
280static void fix_protection(uint32* protection);
281
282
283//	#pragma mark -
284
285
286#if VM_PAGE_FAULT_TRACING
287
288namespace VMPageFaultTracing {
289
290class PageFaultStart : public AbstractTraceEntry {
291public:
292	PageFaultStart(addr_t address, bool write, bool user, addr_t pc)
293		:
294		fAddress(address),
295		fPC(pc),
296		fWrite(write),
297		fUser(user)
298	{
299		Initialized();
300	}
301
302	virtual void AddDump(TraceOutput& out)
303	{
304		out.Print("page fault %#lx %s %s, pc: %#lx", fAddress,
305			fWrite ? "write" : "read", fUser ? "user" : "kernel", fPC);
306	}
307
308private:
309	addr_t	fAddress;
310	addr_t	fPC;
311	bool	fWrite;
312	bool	fUser;
313};
314
315
316// page fault errors
317enum {
318	PAGE_FAULT_ERROR_NO_AREA		= 0,
319	PAGE_FAULT_ERROR_KERNEL_ONLY,
320	PAGE_FAULT_ERROR_WRITE_PROTECTED,
321	PAGE_FAULT_ERROR_READ_PROTECTED,
322	PAGE_FAULT_ERROR_EXECUTE_PROTECTED,
323	PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY,
324	PAGE_FAULT_ERROR_NO_ADDRESS_SPACE
325};
326
327
328class PageFaultError : public AbstractTraceEntry {
329public:
330	PageFaultError(area_id area, status_t error)
331		:
332		fArea(area),
333		fError(error)
334	{
335		Initialized();
336	}
337
338	virtual void AddDump(TraceOutput& out)
339	{
340		switch (fError) {
341			case PAGE_FAULT_ERROR_NO_AREA:
342				out.Print("page fault error: no area");
343				break;
344			case PAGE_FAULT_ERROR_KERNEL_ONLY:
345				out.Print("page fault error: area: %ld, kernel only", fArea);
346				break;
347			case PAGE_FAULT_ERROR_WRITE_PROTECTED:
348				out.Print("page fault error: area: %ld, write protected",
349					fArea);
350				break;
351			case PAGE_FAULT_ERROR_READ_PROTECTED:
352				out.Print("page fault error: area: %ld, read protected", fArea);
353				break;
354			case PAGE_FAULT_ERROR_EXECUTE_PROTECTED:
355				out.Print("page fault error: area: %ld, execute protected",
356					fArea);
357				break;
358			case PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY:
359				out.Print("page fault error: kernel touching bad user memory");
360				break;
361			case PAGE_FAULT_ERROR_NO_ADDRESS_SPACE:
362				out.Print("page fault error: no address space");
363				break;
364			default:
365				out.Print("page fault error: area: %ld, error: %s", fArea,
366					strerror(fError));
367				break;
368		}
369	}
370
371private:
372	area_id		fArea;
373	status_t	fError;
374};
375
376
377class PageFaultDone : public AbstractTraceEntry {
378public:
379	PageFaultDone(area_id area, VMCache* topCache, VMCache* cache,
380			vm_page* page)
381		:
382		fArea(area),
383		fTopCache(topCache),
384		fCache(cache),
385		fPage(page)
386	{
387		Initialized();
388	}
389
390	virtual void AddDump(TraceOutput& out)
391	{
392		out.Print("page fault done: area: %ld, top cache: %p, cache: %p, "
393			"page: %p", fArea, fTopCache, fCache, fPage);
394	}
395
396private:
397	area_id		fArea;
398	VMCache*	fTopCache;
399	VMCache*	fCache;
400	vm_page*	fPage;
401};
402
403}	// namespace VMPageFaultTracing
404
405#	define TPF(x) new(std::nothrow) VMPageFaultTracing::x;
406#else
407#	define TPF(x) ;
408#endif	// VM_PAGE_FAULT_TRACING
409
410
411//	#pragma mark -
412
413
414/*!	The page's cache must be locked.
415*/
416static inline void
417increment_page_wired_count(vm_page* page)
418{
419	if (!page->IsMapped())
420		atomic_add(&gMappedPagesCount, 1);
421	page->IncrementWiredCount();
422}
423
424
425/*!	The page's cache must be locked.
426*/
427static inline void
428decrement_page_wired_count(vm_page* page)
429{
430	page->DecrementWiredCount();
431	if (!page->IsMapped())
432		atomic_add(&gMappedPagesCount, -1);
433}
434
435
436static inline addr_t
437virtual_page_address(VMArea* area, vm_page* page)
438{
439	return area->Base()
440		+ ((page->cache_offset << PAGE_SHIFT) - area->cache_offset);
441}
442
443
444//! You need to have the address space locked when calling this function
445static VMArea*
446lookup_area(VMAddressSpace* addressSpace, area_id id)
447{
448	VMAreaHash::ReadLock();
449
450	VMArea* area = VMAreaHash::LookupLocked(id);
451	if (area != NULL && area->address_space != addressSpace)
452		area = NULL;
453
454	VMAreaHash::ReadUnlock();
455
456	return area;
457}
458
459
460static status_t
461allocate_area_page_protections(VMArea* area)
462{
463	// In the page protections we store only the three user protections,
464	// so we use 4 bits per page.
465	uint32 bytes = (area->Size() / B_PAGE_SIZE + 1) / 2;
466	area->page_protections = (uint8*)malloc_etc(bytes,
467		HEAP_DONT_LOCK_KERNEL_SPACE);
468	if (area->page_protections == NULL)
469		return B_NO_MEMORY;
470
471	// init the page protections for all pages to that of the area
472	uint32 areaProtection = area->protection
473		& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
474	memset(area->page_protections, areaProtection | (areaProtection << 4),
475		bytes);
476	return B_OK;
477}
478
479
480static inline void
481set_area_page_protection(VMArea* area, addr_t pageAddress, uint32 protection)
482{
483	protection &= B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA;
484	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
485	uint8& entry = area->page_protections[pageIndex / 2];
486	if (pageIndex % 2 == 0)
487		entry = (entry & 0xf0) | protection;
488	else
489		entry = (entry & 0x0f) | (protection << 4);
490}
491
492
493static inline uint32
494get_area_page_protection(VMArea* area, addr_t pageAddress)
495{
496	if (area->page_protections == NULL)
497		return area->protection;
498
499	uint32 pageIndex = (pageAddress - area->Base()) / B_PAGE_SIZE;
500	uint32 protection = area->page_protections[pageIndex / 2];
501	if (pageIndex % 2 == 0)
502		protection &= 0x0f;
503	else
504		protection >>= 4;
505
506	// If this is a kernel area we translate the user flags to kernel flags.
507	if (area->address_space == VMAddressSpace::Kernel()) {
508		uint32 kernelProtection = 0;
509		if ((protection & B_READ_AREA) != 0)
510			kernelProtection |= B_KERNEL_READ_AREA;
511		if ((protection & B_WRITE_AREA) != 0)
512			kernelProtection |= B_KERNEL_WRITE_AREA;
513
514		return kernelProtection;
515	}
516
517	return protection | B_KERNEL_READ_AREA
518		| (protection & B_WRITE_AREA ? B_KERNEL_WRITE_AREA : 0);
519}
520
521
522/*!	The caller must have reserved enough pages the translation map
523	implementation might need to map this page.
524	The page's cache must be locked.
525*/
526static status_t
527map_page(VMArea* area, vm_page* page, addr_t address, uint32 protection,
528	vm_page_reservation* reservation)
529{
530	VMTranslationMap* map = area->address_space->TranslationMap();
531
532	bool wasMapped = page->IsMapped();
533
534	if (area->wiring == B_NO_LOCK) {
535		DEBUG_PAGE_ACCESS_CHECK(page);
536
537		bool isKernelSpace = area->address_space == VMAddressSpace::Kernel();
538		vm_page_mapping* mapping = (vm_page_mapping*)object_cache_alloc(
539			gPageMappingsObjectCache,
540			CACHE_DONT_WAIT_FOR_MEMORY
541				| (isKernelSpace ? CACHE_DONT_LOCK_KERNEL_SPACE : 0));
542		if (mapping == NULL)
543			return B_NO_MEMORY;
544
545		mapping->page = page;
546		mapping->area = area;
547
548		map->Lock();
549
550		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
551			area->MemoryType(), reservation);
552
553		// insert mapping into lists
554		if (!page->IsMapped())
555			atomic_add(&gMappedPagesCount, 1);
556
557		page->mappings.Add(mapping);
558		area->mappings.Add(mapping);
559
560		map->Unlock();
561	} else {
562		DEBUG_PAGE_ACCESS_CHECK(page);
563
564		map->Lock();
565		map->Map(address, page->physical_page_number * B_PAGE_SIZE, protection,
566			area->MemoryType(), reservation);
567		map->Unlock();
568
569		increment_page_wired_count(page);
570	}
571
572	if (!wasMapped) {
573		// The page is mapped now, so we must not remain in the cached queue.
574		// It also makes sense to move it from the inactive to the active, since
575		// otherwise the page daemon wouldn't come to keep track of it (in idle
576		// mode) -- if the page isn't touched, it will be deactivated after a
577		// full iteration through the queue at the latest.
578		if (page->State() == PAGE_STATE_CACHED
579				|| page->State() == PAGE_STATE_INACTIVE) {
580			vm_page_set_state(page, PAGE_STATE_ACTIVE);
581		}
582	}
583
584	return B_OK;
585}
586
587
588/*!	If \a preserveModified is \c true, the caller must hold the lock of the
589	page's cache.
590*/
591static inline bool
592unmap_page(VMArea* area, addr_t virtualAddress)
593{
594	return area->address_space->TranslationMap()->UnmapPage(area,
595		virtualAddress, true);
596}
597
598
599/*!	If \a preserveModified is \c true, the caller must hold the lock of all
600	mapped pages' caches.
601*/
602static inline void
603unmap_pages(VMArea* area, addr_t base, size_t size)
604{
605	area->address_space->TranslationMap()->UnmapPages(area, base, size, true);
606}
607
608
609/*!	Cuts a piece out of an area. If the given cut range covers the complete
610	area, it is deleted. If it covers the beginning or the end, the area is
611	resized accordingly. If the range covers some part in the middle of the
612	area, it is split in two; in this case the second area is returned via
613	\a _secondArea (the variable is left untouched in the other cases).
614	The address space must be write locked.
615	The caller must ensure that no part of the given range is wired.
616*/
617static status_t
618cut_area(VMAddressSpace* addressSpace, VMArea* area, addr_t address,
619	addr_t lastAddress, VMArea** _secondArea, bool kernel)
620{
621	// Does the cut range intersect with the area at all?
622	addr_t areaLast = area->Base() + (area->Size() - 1);
623	if (area->Base() > lastAddress || areaLast < address)
624		return B_OK;
625
626	// Is the area fully covered?
627	if (area->Base() >= address && areaLast <= lastAddress) {
628		delete_area(addressSpace, area, false);
629		return B_OK;
630	}
631
632	int priority;
633	uint32 allocationFlags;
634	if (addressSpace == VMAddressSpace::Kernel()) {
635		priority = VM_PRIORITY_SYSTEM;
636		allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
637			| HEAP_DONT_LOCK_KERNEL_SPACE;
638	} else {
639		priority = VM_PRIORITY_USER;
640		allocationFlags = 0;
641	}
642
643	VMCache* cache = vm_area_get_locked_cache(area);
644	VMCacheChainLocker cacheChainLocker(cache);
645	cacheChainLocker.LockAllSourceCaches();
646
647	// Cut the end only?
648	if (areaLast <= lastAddress) {
649		size_t oldSize = area->Size();
650		size_t newSize = address - area->Base();
651
652		status_t error = addressSpace->ShrinkAreaTail(area, newSize,
653			allocationFlags);
654		if (error != B_OK)
655			return error;
656
657		// unmap pages
658		unmap_pages(area, address, oldSize - newSize);
659
660		// If no one else uses the area's cache, we can resize it, too.
661		if (cache->areas == area && area->cache_next == NULL
662			&& cache->consumers.IsEmpty()
663			&& cache->type == CACHE_TYPE_RAM) {
664			// Since VMCache::Resize() can temporarily drop the lock, we must
665			// unlock all lower caches to prevent locking order inversion.
666			cacheChainLocker.Unlock(cache);
667			cache->Resize(cache->virtual_base + newSize, priority);
668			cache->ReleaseRefAndUnlock();
669		}
670
671		return B_OK;
672	}
673
674	// Cut the beginning only?
675	if (area->Base() >= address) {
676		addr_t oldBase = area->Base();
677		addr_t newBase = lastAddress + 1;
678		size_t newSize = areaLast - lastAddress;
679
680		// unmap pages
681		unmap_pages(area, oldBase, newBase - oldBase);
682
683		// resize the area
684		status_t error = addressSpace->ShrinkAreaHead(area, newSize,
685			allocationFlags);
686		if (error != B_OK)
687			return error;
688
689		// TODO: If no one else uses the area's cache, we should resize it, too!
690
691		area->cache_offset += newBase - oldBase;
692
693		return B_OK;
694	}
695
696	// The tough part -- cut a piece out of the middle of the area.
697	// We do that by shrinking the area to the begin section and creating a
698	// new area for the end section.
699
700	addr_t firstNewSize = address - area->Base();
701	addr_t secondBase = lastAddress + 1;
702	addr_t secondSize = areaLast - lastAddress;
703
704	// unmap pages
705	unmap_pages(area, address, area->Size() - firstNewSize);
706
707	// resize the area
708	addr_t oldSize = area->Size();
709	status_t error = addressSpace->ShrinkAreaTail(area, firstNewSize,
710		allocationFlags);
711	if (error != B_OK)
712		return error;
713
714	// TODO: If no one else uses the area's cache, we might want to create a
715	// new cache for the second area, transfer the concerned pages from the
716	// first cache to it and resize the first cache.
717
718	// map the second area
719	virtual_address_restrictions addressRestrictions = {};
720	addressRestrictions.address = (void*)secondBase;
721	addressRestrictions.address_specification = B_EXACT_ADDRESS;
722	VMArea* secondArea;
723	error = map_backing_store(addressSpace, cache,
724		area->cache_offset + (secondBase - area->Base()), area->name,
725		secondSize, area->wiring, area->protection, REGION_NO_PRIVATE_MAP, 0,
726		&addressRestrictions, kernel, &secondArea, NULL);
727	if (error != B_OK) {
728		addressSpace->ShrinkAreaTail(area, oldSize, allocationFlags);
729		return error;
730	}
731
732	// We need a cache reference for the new area.
733	cache->AcquireRefLocked();
734
735	if (_secondArea != NULL)
736		*_secondArea = secondArea;
737
738	return B_OK;
739}
740
741
742/*!	Deletes all areas in the given address range.
743	The address space must be write-locked.
744	The caller must ensure that no part of the given range is wired.
745*/
746static status_t
747unmap_address_range(VMAddressSpace* addressSpace, addr_t address, addr_t size,
748	bool kernel)
749{
750	size = PAGE_ALIGN(size);
751	addr_t lastAddress = address + (size - 1);
752
753	// Check, whether the caller is allowed to modify the concerned areas.
754	if (!kernel) {
755		for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
756				VMArea* area = it.Next();) {
757			addr_t areaLast = area->Base() + (area->Size() - 1);
758			if (area->Base() < lastAddress && address < areaLast) {
759				if (area->address_space == VMAddressSpace::Kernel()) {
760					dprintf("unmap_address_range: team %" B_PRId32 " tried to "
761						"unmap range of kernel area %" B_PRId32 " (%s)\n",
762						team_get_current_team_id(), area->id, area->name);
763					return B_NOT_ALLOWED;
764				}
765			}
766		}
767	}
768
769	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
770			VMArea* area = it.Next();) {
771		addr_t areaLast = area->Base() + (area->Size() - 1);
772		if (area->Base() < lastAddress && address < areaLast) {
773			status_t error = cut_area(addressSpace, area, address,
774				lastAddress, NULL, kernel);
775			if (error != B_OK)
776				return error;
777				// Failing after already messing with areas is ugly, but we
778				// can't do anything about it.
779		}
780	}
781
782	return B_OK;
783}
784
785
786/*! You need to hold the lock of the cache and the write lock of the address
787	space when calling this function.
788	Note, that in case of error your cache will be temporarily unlocked.
789	If \a addressSpec is \c B_EXACT_ADDRESS and the
790	\c CREATE_AREA_UNMAP_ADDRESS_RANGE flag is specified, the caller must ensure
791	that no part of the specified address range (base \c *_virtualAddress, size
792	\a size) is wired.
793*/
794static status_t
795map_backing_store(VMAddressSpace* addressSpace, VMCache* cache, off_t offset,
796	const char* areaName, addr_t size, int wiring, int protection, int mapping,
797	uint32 flags, const virtual_address_restrictions* addressRestrictions,
798	bool kernel, VMArea** _area, void** _virtualAddress)
799{
800	TRACE(("map_backing_store: aspace %p, cache %p, virtual %p, offset 0x%"
801		B_PRIx64 ", size %" B_PRIuADDR ", addressSpec %" B_PRIu32 ", wiring %d"
802		", protection %d, area %p, areaName '%s'\n", addressSpace, cache,
803		addressRestrictions->address, offset, size,
804		addressRestrictions->address_specification, wiring, protection,
805		_area, areaName));
806	cache->AssertLocked();
807
808	if (size == 0) {
809#if KDEBUG
810		panic("map_backing_store(): called with size=0 for area '%s'!",
811			areaName);
812#endif
813		return B_BAD_VALUE;
814	}
815
816	uint32 allocationFlags = HEAP_DONT_WAIT_FOR_MEMORY
817		| HEAP_DONT_LOCK_KERNEL_SPACE;
818	int priority;
819	if (addressSpace != VMAddressSpace::Kernel()) {
820		priority = VM_PRIORITY_USER;
821	} else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0) {
822		priority = VM_PRIORITY_VIP;
823		allocationFlags |= HEAP_PRIORITY_VIP;
824	} else
825		priority = VM_PRIORITY_SYSTEM;
826
827	VMArea* area = addressSpace->CreateArea(areaName, wiring, protection,
828		allocationFlags);
829	if (area == NULL)
830		return B_NO_MEMORY;
831
832	status_t status;
833
834	// if this is a private map, we need to create a new cache
835	// to handle the private copies of pages as they are written to
836	VMCache* sourceCache = cache;
837	if (mapping == REGION_PRIVATE_MAP) {
838		VMCache* newCache;
839
840		// create an anonymous cache
841		status = VMCacheFactory::CreateAnonymousCache(newCache,
842			(protection & B_STACK_AREA) != 0
843				|| (protection & B_OVERCOMMITTING_AREA) != 0, 0,
844			cache->GuardSize() / B_PAGE_SIZE, true, VM_PRIORITY_USER);
845		if (status != B_OK)
846			goto err1;
847
848		newCache->Lock();
849		newCache->temporary = 1;
850		newCache->virtual_base = offset;
851		newCache->virtual_end = offset + size;
852
853		cache->AddConsumer(newCache);
854
855		cache = newCache;
856	}
857
858	if ((flags & CREATE_AREA_DONT_COMMIT_MEMORY) == 0) {
859		status = cache->SetMinimalCommitment(size, priority);
860		if (status != B_OK)
861			goto err2;
862	}
863
864	// check to see if this address space has entered DELETE state
865	if (addressSpace->IsBeingDeleted()) {
866		// okay, someone is trying to delete this address space now, so we can't
867		// insert the area, so back out
868		status = B_BAD_TEAM_ID;
869		goto err2;
870	}
871
872	if (addressRestrictions->address_specification == B_EXACT_ADDRESS
873			&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0) {
874		status = unmap_address_range(addressSpace,
875			(addr_t)addressRestrictions->address, size, kernel);
876		if (status != B_OK)
877			goto err2;
878	}
879
880	status = addressSpace->InsertArea(area, size, addressRestrictions,
881		allocationFlags, _virtualAddress);
882	if (status == B_NO_MEMORY
883			&& addressRestrictions->address_specification == B_ANY_KERNEL_ADDRESS) {
884		// TODO: At present, there is no way to notify the low_resource monitor
885		// that kernel addresss space is fragmented, nor does it check for this
886		// automatically. Due to how many locks are held, we cannot wait here
887		// for space to be freed up, but it would be good to at least notify
888		// that we tried and failed to allocate some amount.
889	}
890	if (status != B_OK)
891		goto err2;
892
893	// attach the cache to the area
894	area->cache = cache;
895	area->cache_offset = offset;
896
897	// point the cache back to the area
898	cache->InsertAreaLocked(area);
899	if (mapping == REGION_PRIVATE_MAP)
900		cache->Unlock();
901
902	// insert the area in the global area hash table
903	VMAreaHash::Insert(area);
904
905	// grab a ref to the address space (the area holds this)
906	addressSpace->Get();
907
908//	ktrace_printf("map_backing_store: cache: %p (source: %p), \"%s\" -> %p",
909//		cache, sourceCache, areaName, area);
910
911	*_area = area;
912	return B_OK;
913
914err2:
915	if (mapping == REGION_PRIVATE_MAP) {
916		// We created this cache, so we must delete it again. Note, that we
917		// need to temporarily unlock the source cache or we'll otherwise
918		// deadlock, since VMCache::_RemoveConsumer() will try to lock it, too.
919		sourceCache->Unlock();
920		cache->ReleaseRefAndUnlock();
921		sourceCache->Lock();
922	}
923err1:
924	addressSpace->DeleteArea(area, allocationFlags);
925	return status;
926}
927
928
929/*!	Equivalent to wait_if_area_range_is_wired(area, area->Base(), area->Size(),
930	  locker1, locker2).
931*/
932template<typename LockerType1, typename LockerType2>
933static inline bool
934wait_if_area_is_wired(VMArea* area, LockerType1* locker1, LockerType2* locker2)
935{
936	area->cache->AssertLocked();
937
938	VMAreaUnwiredWaiter waiter;
939	if (!area->AddWaiterIfWired(&waiter))
940		return false;
941
942	// unlock everything and wait
943	if (locker1 != NULL)
944		locker1->Unlock();
945	if (locker2 != NULL)
946		locker2->Unlock();
947
948	waiter.waitEntry.Wait();
949
950	return true;
951}
952
953
954/*!	Checks whether the given area has any wired ranges intersecting with the
955	specified range and waits, if so.
956
957	When it has to wait, the function calls \c Unlock() on both \a locker1
958	and \a locker2, if given.
959	The area's top cache must be locked and must be unlocked as a side effect
960	of calling \c Unlock() on either \a locker1 or \a locker2.
961
962	If the function does not have to wait it does not modify or unlock any
963	object.
964
965	\param area The area to be checked.
966	\param base The base address of the range to check.
967	\param size The size of the address range to check.
968	\param locker1 An object to be unlocked when before starting to wait (may
969		be \c NULL).
970	\param locker2 An object to be unlocked when before starting to wait (may
971		be \c NULL).
972	\return \c true, if the function had to wait, \c false otherwise.
973*/
974template<typename LockerType1, typename LockerType2>
975static inline bool
976wait_if_area_range_is_wired(VMArea* area, addr_t base, size_t size,
977	LockerType1* locker1, LockerType2* locker2)
978{
979	area->cache->AssertLocked();
980
981	VMAreaUnwiredWaiter waiter;
982	if (!area->AddWaiterIfWired(&waiter, base, size))
983		return false;
984
985	// unlock everything and wait
986	if (locker1 != NULL)
987		locker1->Unlock();
988	if (locker2 != NULL)
989		locker2->Unlock();
990
991	waiter.waitEntry.Wait();
992
993	return true;
994}
995
996
997/*!	Checks whether the given address space has any wired ranges intersecting
998	with the specified range and waits, if so.
999
1000	Similar to wait_if_area_range_is_wired(), with the following differences:
1001	- All areas intersecting with the range are checked (respectively all until
1002	  one is found that contains a wired range intersecting with the given
1003	  range).
1004	- The given address space must at least be read-locked and must be unlocked
1005	  when \c Unlock() is called on \a locker.
1006	- None of the areas' caches are allowed to be locked.
1007*/
1008template<typename LockerType>
1009static inline bool
1010wait_if_address_range_is_wired(VMAddressSpace* addressSpace, addr_t base,
1011	size_t size, LockerType* locker)
1012{
1013	addr_t end = base + size - 1;
1014	for (VMAddressSpace::AreaIterator it = addressSpace->GetAreaIterator();
1015			VMArea* area = it.Next();) {
1016		// TODO: Introduce a VMAddressSpace method to get a close iterator!
1017		if (area->Base() > end)
1018			return false;
1019
1020		if (base >= area->Base() + area->Size() - 1)
1021			continue;
1022
1023		AreaCacheLocker cacheLocker(vm_area_get_locked_cache(area));
1024
1025		if (wait_if_area_range_is_wired(area, base, size, locker, &cacheLocker))
1026			return true;
1027	}
1028
1029	return false;
1030}
1031
1032
1033/*!	Prepares an area to be used for vm_set_kernel_area_debug_protection().
1034	It must be called in a situation where the kernel address space may be
1035	locked.
1036*/
1037status_t
1038vm_prepare_kernel_area_debug_protection(area_id id, void** cookie)
1039{
1040	AddressSpaceReadLocker locker;
1041	VMArea* area;
1042	status_t status = locker.SetFromArea(id, area);
1043	if (status != B_OK)
1044		return status;
1045
1046	if (area->page_protections == NULL) {
1047		status = allocate_area_page_protections(area);
1048		if (status != B_OK)
1049			return status;
1050	}
1051
1052	*cookie = (void*)area;
1053	return B_OK;
1054}
1055
1056
1057/*!	This is a debug helper function that can only be used with very specific
1058	use cases.
1059	Sets protection for the given address range to the protection specified.
1060	If \a protection is 0 then the involved pages will be marked non-present
1061	in the translation map to cause a fault on access. The pages aren't
1062	actually unmapped however so that they can be marked present again with
1063	additional calls to this function. For this to work the area must be
1064	fully locked in memory so that the pages aren't otherwise touched.
1065	This function does not lock the kernel address space and needs to be
1066	supplied with a \a cookie retrieved from a successful call to
1067	vm_prepare_kernel_area_debug_protection().
1068*/
1069status_t
1070vm_set_kernel_area_debug_protection(void* cookie, void* _address, size_t size,
1071	uint32 protection)
1072{
1073	// check address range
1074	addr_t address = (addr_t)_address;
1075	size = PAGE_ALIGN(size);
1076
1077	if ((address % B_PAGE_SIZE) != 0
1078		|| (addr_t)address + size < (addr_t)address
1079		|| !IS_KERNEL_ADDRESS(address)
1080		|| !IS_KERNEL_ADDRESS((addr_t)address + size)) {
1081		return B_BAD_VALUE;
1082	}
1083
1084	// Translate the kernel protection to user protection as we only store that.
1085	if ((protection & B_KERNEL_READ_AREA) != 0)
1086		protection |= B_READ_AREA;
1087	if ((protection & B_KERNEL_WRITE_AREA) != 0)
1088		protection |= B_WRITE_AREA;
1089
1090	VMAddressSpace* addressSpace = VMAddressSpace::GetKernel();
1091	VMTranslationMap* map = addressSpace->TranslationMap();
1092	VMArea* area = (VMArea*)cookie;
1093
1094	addr_t offset = address - area->Base();
1095	if (area->Size() - offset < size) {
1096		panic("protect range not fully within supplied area");
1097		return B_BAD_VALUE;
1098	}
1099
1100	if (area->page_protections == NULL) {
1101		panic("area has no page protections");
1102		return B_BAD_VALUE;
1103	}
1104
1105	// Invalidate the mapping entries so any access to them will fault or
1106	// restore the mapping entries unchanged so that lookup will success again.
1107	map->Lock();
1108	map->DebugMarkRangePresent(address, address + size, protection != 0);
1109	map->Unlock();
1110
1111	// And set the proper page protections so that the fault case will actually
1112	// fail and not simply try to map a new page.
1113	for (addr_t pageAddress = address; pageAddress < address + size;
1114			pageAddress += B_PAGE_SIZE) {
1115		set_area_page_protection(area, pageAddress, protection);
1116	}
1117
1118	return B_OK;
1119}
1120
1121
1122status_t
1123vm_block_address_range(const char* name, void* address, addr_t size)
1124{
1125	if (!arch_vm_supports_protection(0))
1126		return B_NOT_SUPPORTED;
1127
1128	AddressSpaceWriteLocker locker;
1129	status_t status = locker.SetTo(VMAddressSpace::KernelID());
1130	if (status != B_OK)
1131		return status;
1132
1133	VMAddressSpace* addressSpace = locker.AddressSpace();
1134
1135	// create an anonymous cache
1136	VMCache* cache;
1137	status = VMCacheFactory::CreateAnonymousCache(cache, false, 0, 0, false,
1138		VM_PRIORITY_SYSTEM);
1139	if (status != B_OK)
1140		return status;
1141
1142	cache->temporary = 1;
1143	cache->virtual_end = size;
1144	cache->Lock();
1145
1146	VMArea* area;
1147	virtual_address_restrictions addressRestrictions = {};
1148	addressRestrictions.address = address;
1149	addressRestrictions.address_specification = B_EXACT_ADDRESS;
1150	status = map_backing_store(addressSpace, cache, 0, name, size,
1151		B_ALREADY_WIRED, 0, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1152		true, &area, NULL);
1153	if (status != B_OK) {
1154		cache->ReleaseRefAndUnlock();
1155		return status;
1156	}
1157
1158	cache->Unlock();
1159	area->cache_type = CACHE_TYPE_RAM;
1160	return area->id;
1161}
1162
1163
1164status_t
1165vm_unreserve_address_range(team_id team, void* address, addr_t size)
1166{
1167	AddressSpaceWriteLocker locker(team);
1168	if (!locker.IsLocked())
1169		return B_BAD_TEAM_ID;
1170
1171	VMAddressSpace* addressSpace = locker.AddressSpace();
1172	return addressSpace->UnreserveAddressRange((addr_t)address, size,
1173		addressSpace == VMAddressSpace::Kernel()
1174			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0);
1175}
1176
1177
1178status_t
1179vm_reserve_address_range(team_id team, void** _address, uint32 addressSpec,
1180	addr_t size, uint32 flags)
1181{
1182	if (size == 0)
1183		return B_BAD_VALUE;
1184
1185	AddressSpaceWriteLocker locker(team);
1186	if (!locker.IsLocked())
1187		return B_BAD_TEAM_ID;
1188
1189	virtual_address_restrictions addressRestrictions = {};
1190	addressRestrictions.address = *_address;
1191	addressRestrictions.address_specification = addressSpec;
1192	VMAddressSpace* addressSpace = locker.AddressSpace();
1193	return addressSpace->ReserveAddressRange(size, &addressRestrictions, flags,
1194		addressSpace == VMAddressSpace::Kernel()
1195			? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0,
1196		_address);
1197}
1198
1199
1200area_id
1201vm_create_anonymous_area(team_id team, const char *name, addr_t size,
1202	uint32 wiring, uint32 protection, uint32 flags, addr_t guardSize,
1203	const virtual_address_restrictions* virtualAddressRestrictions,
1204	const physical_address_restrictions* physicalAddressRestrictions,
1205	bool kernel, void** _address)
1206{
1207	VMArea* area;
1208	VMCache* cache;
1209	vm_page* page = NULL;
1210	bool isStack = (protection & B_STACK_AREA) != 0;
1211	page_num_t guardPages;
1212	bool canOvercommit = false;
1213	uint32 pageAllocFlags = (flags & CREATE_AREA_DONT_CLEAR) == 0
1214		? VM_PAGE_ALLOC_CLEAR : 0;
1215
1216	TRACE(("create_anonymous_area [%" B_PRId32 "] %s: size 0x%" B_PRIxADDR "\n",
1217		team, name, size));
1218
1219	size = PAGE_ALIGN(size);
1220	guardSize = PAGE_ALIGN(guardSize);
1221	guardPages = guardSize / B_PAGE_SIZE;
1222
1223	if (size == 0 || size < guardSize)
1224		return B_BAD_VALUE;
1225	if (!arch_vm_supports_protection(protection))
1226		return B_NOT_SUPPORTED;
1227
1228	if (isStack || (protection & B_OVERCOMMITTING_AREA) != 0)
1229		canOvercommit = true;
1230
1231#ifdef DEBUG_KERNEL_STACKS
1232	if ((protection & B_KERNEL_STACK_AREA) != 0)
1233		isStack = true;
1234#endif
1235
1236	// check parameters
1237	switch (virtualAddressRestrictions->address_specification) {
1238		case B_ANY_ADDRESS:
1239		case B_EXACT_ADDRESS:
1240		case B_BASE_ADDRESS:
1241		case B_ANY_KERNEL_ADDRESS:
1242		case B_ANY_KERNEL_BLOCK_ADDRESS:
1243		case B_RANDOMIZED_ANY_ADDRESS:
1244		case B_RANDOMIZED_BASE_ADDRESS:
1245			break;
1246
1247		default:
1248			return B_BAD_VALUE;
1249	}
1250
1251	// If low or high physical address restrictions are given, we force
1252	// B_CONTIGUOUS wiring, since only then we'll use
1253	// vm_page_allocate_page_run() which deals with those restrictions.
1254	if (physicalAddressRestrictions->low_address != 0
1255		|| physicalAddressRestrictions->high_address != 0) {
1256		wiring = B_CONTIGUOUS;
1257	}
1258
1259	physical_address_restrictions stackPhysicalRestrictions;
1260	bool doReserveMemory = false;
1261	switch (wiring) {
1262		case B_NO_LOCK:
1263			break;
1264		case B_FULL_LOCK:
1265		case B_LAZY_LOCK:
1266		case B_CONTIGUOUS:
1267			doReserveMemory = true;
1268			break;
1269		case B_ALREADY_WIRED:
1270			break;
1271		case B_LOMEM:
1272			stackPhysicalRestrictions = *physicalAddressRestrictions;
1273			stackPhysicalRestrictions.high_address = 16 * 1024 * 1024;
1274			physicalAddressRestrictions = &stackPhysicalRestrictions;
1275			wiring = B_CONTIGUOUS;
1276			doReserveMemory = true;
1277			break;
1278		case B_32_BIT_FULL_LOCK:
1279			if (B_HAIKU_PHYSICAL_BITS <= 32
1280				|| (uint64)vm_page_max_address() < (uint64)1 << 32) {
1281				wiring = B_FULL_LOCK;
1282				doReserveMemory = true;
1283				break;
1284			}
1285			// TODO: We don't really support this mode efficiently. Just fall
1286			// through for now ...
1287		case B_32_BIT_CONTIGUOUS:
1288			#if B_HAIKU_PHYSICAL_BITS > 32
1289				if (vm_page_max_address() >= (phys_addr_t)1 << 32) {
1290					stackPhysicalRestrictions = *physicalAddressRestrictions;
1291					stackPhysicalRestrictions.high_address
1292						= (phys_addr_t)1 << 32;
1293					physicalAddressRestrictions = &stackPhysicalRestrictions;
1294				}
1295			#endif
1296			wiring = B_CONTIGUOUS;
1297			doReserveMemory = true;
1298			break;
1299		default:
1300			return B_BAD_VALUE;
1301	}
1302
1303	// Optimization: For a single-page contiguous allocation without low/high
1304	// memory restriction B_FULL_LOCK wiring suffices.
1305	if (wiring == B_CONTIGUOUS && size == B_PAGE_SIZE
1306		&& physicalAddressRestrictions->low_address == 0
1307		&& physicalAddressRestrictions->high_address == 0) {
1308		wiring = B_FULL_LOCK;
1309	}
1310
1311	// For full lock or contiguous areas we're also going to map the pages and
1312	// thus need to reserve pages for the mapping backend upfront.
1313	addr_t reservedMapPages = 0;
1314	if (wiring == B_FULL_LOCK || wiring == B_CONTIGUOUS) {
1315		AddressSpaceWriteLocker locker;
1316		status_t status = locker.SetTo(team);
1317		if (status != B_OK)
1318			return status;
1319
1320		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1321		reservedMapPages = map->MaxPagesNeededToMap(0, size - 1);
1322	}
1323
1324	int priority;
1325	if (team != VMAddressSpace::KernelID())
1326		priority = VM_PRIORITY_USER;
1327	else if ((flags & CREATE_AREA_PRIORITY_VIP) != 0)
1328		priority = VM_PRIORITY_VIP;
1329	else
1330		priority = VM_PRIORITY_SYSTEM;
1331
1332	// Reserve memory before acquiring the address space lock. This reduces the
1333	// chances of failure, since while holding the write lock to the address
1334	// space (if it is the kernel address space that is), the low memory handler
1335	// won't be able to free anything for us.
1336	addr_t reservedMemory = 0;
1337	if (doReserveMemory) {
1338		bigtime_t timeout = (flags & CREATE_AREA_DONT_WAIT) != 0 ? 0 : 1000000;
1339		if (vm_try_reserve_memory(size, priority, timeout) != B_OK)
1340			return B_NO_MEMORY;
1341		reservedMemory = size;
1342		// TODO: We don't reserve the memory for the pages for the page
1343		// directories/tables. We actually need to do since we currently don't
1344		// reclaim them (and probably can't reclaim all of them anyway). Thus
1345		// there are actually less physical pages than there should be, which
1346		// can get the VM into trouble in low memory situations.
1347	}
1348
1349	AddressSpaceWriteLocker locker;
1350	VMAddressSpace* addressSpace;
1351	status_t status;
1352
1353	// For full lock areas reserve the pages before locking the address
1354	// space. E.g. block caches can't release their memory while we hold the
1355	// address space lock.
1356	page_num_t reservedPages = reservedMapPages;
1357	if (wiring == B_FULL_LOCK)
1358		reservedPages += size / B_PAGE_SIZE;
1359
1360	vm_page_reservation reservation;
1361	if (reservedPages > 0) {
1362		if ((flags & CREATE_AREA_DONT_WAIT) != 0) {
1363			if (!vm_page_try_reserve_pages(&reservation, reservedPages,
1364					priority)) {
1365				reservedPages = 0;
1366				status = B_WOULD_BLOCK;
1367				goto err0;
1368			}
1369		} else
1370			vm_page_reserve_pages(&reservation, reservedPages, priority);
1371	}
1372
1373	if (wiring == B_CONTIGUOUS) {
1374		// we try to allocate the page run here upfront as this may easily
1375		// fail for obvious reasons
1376		page = vm_page_allocate_page_run(PAGE_STATE_WIRED | pageAllocFlags,
1377			size / B_PAGE_SIZE, physicalAddressRestrictions, priority);
1378		if (page == NULL) {
1379			status = B_NO_MEMORY;
1380			goto err0;
1381		}
1382	}
1383
1384	// Lock the address space and, if B_EXACT_ADDRESS and
1385	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1386	// is not wired.
1387	do {
1388		status = locker.SetTo(team);
1389		if (status != B_OK)
1390			goto err1;
1391
1392		addressSpace = locker.AddressSpace();
1393	} while (virtualAddressRestrictions->address_specification
1394			== B_EXACT_ADDRESS
1395		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1396		&& wait_if_address_range_is_wired(addressSpace,
1397			(addr_t)virtualAddressRestrictions->address, size, &locker));
1398
1399	// create an anonymous cache
1400	// if it's a stack, make sure that two pages are available at least
1401	status = VMCacheFactory::CreateAnonymousCache(cache, canOvercommit,
1402		isStack ? (min_c(2, size / B_PAGE_SIZE - guardPages)) : 0, guardPages,
1403		wiring == B_NO_LOCK, priority);
1404	if (status != B_OK)
1405		goto err1;
1406
1407	cache->temporary = 1;
1408	cache->virtual_end = size;
1409	cache->committed_size = reservedMemory;
1410		// TODO: This should be done via a method.
1411	reservedMemory = 0;
1412
1413	cache->Lock();
1414
1415	status = map_backing_store(addressSpace, cache, 0, name, size, wiring,
1416		protection, REGION_NO_PRIVATE_MAP, flags, virtualAddressRestrictions,
1417		kernel, &area, _address);
1418
1419	if (status != B_OK) {
1420		cache->ReleaseRefAndUnlock();
1421		goto err1;
1422	}
1423
1424	locker.DegradeToReadLock();
1425
1426	switch (wiring) {
1427		case B_NO_LOCK:
1428		case B_LAZY_LOCK:
1429			// do nothing - the pages are mapped in as needed
1430			break;
1431
1432		case B_FULL_LOCK:
1433		{
1434			// Allocate and map all pages for this area
1435
1436			off_t offset = 0;
1437			for (addr_t address = area->Base();
1438					address < area->Base() + (area->Size() - 1);
1439					address += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1440#ifdef DEBUG_KERNEL_STACKS
1441#	ifdef STACK_GROWS_DOWNWARDS
1442				if (isStack && address < area->Base()
1443						+ KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1444#	else
1445				if (isStack && address >= area->Base() + area->Size()
1446						- KERNEL_STACK_GUARD_PAGES * B_PAGE_SIZE)
1447#	endif
1448					continue;
1449#endif
1450				vm_page* page = vm_page_allocate_page(&reservation,
1451					PAGE_STATE_WIRED | pageAllocFlags);
1452				cache->InsertPage(page, offset);
1453				map_page(area, page, address, protection, &reservation);
1454
1455				DEBUG_PAGE_ACCESS_END(page);
1456			}
1457
1458			break;
1459		}
1460
1461		case B_ALREADY_WIRED:
1462		{
1463			// The pages should already be mapped. This is only really useful
1464			// during boot time. Find the appropriate vm_page objects and stick
1465			// them in the cache object.
1466			VMTranslationMap* map = addressSpace->TranslationMap();
1467			off_t offset = 0;
1468
1469			if (!gKernelStartup)
1470				panic("ALREADY_WIRED flag used outside kernel startup\n");
1471
1472			map->Lock();
1473
1474			for (addr_t virtualAddress = area->Base();
1475					virtualAddress < area->Base() + (area->Size() - 1);
1476					virtualAddress += B_PAGE_SIZE, offset += B_PAGE_SIZE) {
1477				phys_addr_t physicalAddress;
1478				uint32 flags;
1479				status = map->Query(virtualAddress, &physicalAddress, &flags);
1480				if (status < B_OK) {
1481					panic("looking up mapping failed for va 0x%lx\n",
1482						virtualAddress);
1483				}
1484				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1485				if (page == NULL) {
1486					panic("looking up page failed for pa %#" B_PRIxPHYSADDR
1487						"\n", physicalAddress);
1488				}
1489
1490				DEBUG_PAGE_ACCESS_START(page);
1491
1492				cache->InsertPage(page, offset);
1493				increment_page_wired_count(page);
1494				vm_page_set_state(page, PAGE_STATE_WIRED);
1495				page->busy = false;
1496
1497				DEBUG_PAGE_ACCESS_END(page);
1498			}
1499
1500			map->Unlock();
1501			break;
1502		}
1503
1504		case B_CONTIGUOUS:
1505		{
1506			// We have already allocated our continuous pages run, so we can now
1507			// just map them in the address space
1508			VMTranslationMap* map = addressSpace->TranslationMap();
1509			phys_addr_t physicalAddress
1510				= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
1511			addr_t virtualAddress = area->Base();
1512			off_t offset = 0;
1513
1514			map->Lock();
1515
1516			for (virtualAddress = area->Base(); virtualAddress < area->Base()
1517					+ (area->Size() - 1); virtualAddress += B_PAGE_SIZE,
1518					offset += B_PAGE_SIZE, physicalAddress += B_PAGE_SIZE) {
1519				page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
1520				if (page == NULL)
1521					panic("couldn't lookup physical page just allocated\n");
1522
1523				status = map->Map(virtualAddress, physicalAddress, protection,
1524					area->MemoryType(), &reservation);
1525				if (status < B_OK)
1526					panic("couldn't map physical page in page run\n");
1527
1528				cache->InsertPage(page, offset);
1529				increment_page_wired_count(page);
1530
1531				DEBUG_PAGE_ACCESS_END(page);
1532			}
1533
1534			map->Unlock();
1535			break;
1536		}
1537
1538		default:
1539			break;
1540	}
1541
1542	cache->Unlock();
1543
1544	if (reservedPages > 0)
1545		vm_page_unreserve_pages(&reservation);
1546
1547	TRACE(("vm_create_anonymous_area: done\n"));
1548
1549	area->cache_type = CACHE_TYPE_RAM;
1550	return area->id;
1551
1552err1:
1553	if (wiring == B_CONTIGUOUS) {
1554		// we had reserved the area space upfront...
1555		phys_addr_t pageNumber = page->physical_page_number;
1556		int32 i;
1557		for (i = size / B_PAGE_SIZE; i-- > 0; pageNumber++) {
1558			page = vm_lookup_page(pageNumber);
1559			if (page == NULL)
1560				panic("couldn't lookup physical page just allocated\n");
1561
1562			vm_page_set_state(page, PAGE_STATE_FREE);
1563		}
1564	}
1565
1566err0:
1567	if (reservedPages > 0)
1568		vm_page_unreserve_pages(&reservation);
1569	if (reservedMemory > 0)
1570		vm_unreserve_memory(reservedMemory);
1571
1572	return status;
1573}
1574
1575
1576area_id
1577vm_map_physical_memory(team_id team, const char* name, void** _address,
1578	uint32 addressSpec, addr_t size, uint32 protection,
1579	phys_addr_t physicalAddress, bool alreadyWired)
1580{
1581	VMArea* area;
1582	VMCache* cache;
1583	addr_t mapOffset;
1584
1585	TRACE(("vm_map_physical_memory(aspace = %" B_PRId32 ", \"%s\", virtual = %p"
1586		", spec = %" B_PRIu32 ", size = %" B_PRIxADDR ", protection = %"
1587		B_PRIu32 ", phys = %#" B_PRIxPHYSADDR ")\n", team, name, *_address,
1588		addressSpec, size, protection, physicalAddress));
1589
1590	if (!arch_vm_supports_protection(protection))
1591		return B_NOT_SUPPORTED;
1592
1593	AddressSpaceWriteLocker locker(team);
1594	if (!locker.IsLocked())
1595		return B_BAD_TEAM_ID;
1596
1597	// if the physical address is somewhat inside a page,
1598	// move the actual area down to align on a page boundary
1599	mapOffset = physicalAddress % B_PAGE_SIZE;
1600	size += mapOffset;
1601	physicalAddress -= mapOffset;
1602
1603	size = PAGE_ALIGN(size);
1604
1605	// create a device cache
1606	status_t status = VMCacheFactory::CreateDeviceCache(cache, physicalAddress);
1607	if (status != B_OK)
1608		return status;
1609
1610	cache->virtual_end = size;
1611
1612	cache->Lock();
1613
1614	virtual_address_restrictions addressRestrictions = {};
1615	addressRestrictions.address = *_address;
1616	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1617	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1618		B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0, &addressRestrictions,
1619		true, &area, _address);
1620
1621	if (status < B_OK)
1622		cache->ReleaseRefLocked();
1623
1624	cache->Unlock();
1625
1626	if (status == B_OK) {
1627		// set requested memory type -- use uncached, if not given
1628		uint32 memoryType = addressSpec & B_MTR_MASK;
1629		if (memoryType == 0)
1630			memoryType = B_MTR_UC;
1631
1632		area->SetMemoryType(memoryType);
1633
1634		status = arch_vm_set_memory_type(area, physicalAddress, memoryType);
1635		if (status != B_OK)
1636			delete_area(locker.AddressSpace(), area, false);
1637	}
1638
1639	if (status != B_OK)
1640		return status;
1641
1642	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1643
1644	if (alreadyWired) {
1645		// The area is already mapped, but possibly not with the right
1646		// memory type.
1647		map->Lock();
1648		map->ProtectArea(area, area->protection);
1649		map->Unlock();
1650	} else {
1651		// Map the area completely.
1652
1653		// reserve pages needed for the mapping
1654		size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1655			area->Base() + (size - 1));
1656		vm_page_reservation reservation;
1657		vm_page_reserve_pages(&reservation, reservePages,
1658			team == VMAddressSpace::KernelID()
1659				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1660
1661		map->Lock();
1662
1663		for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1664			map->Map(area->Base() + offset, physicalAddress + offset,
1665				protection, area->MemoryType(), &reservation);
1666		}
1667
1668		map->Unlock();
1669
1670		vm_page_unreserve_pages(&reservation);
1671	}
1672
1673	// modify the pointer returned to be offset back into the new area
1674	// the same way the physical address in was offset
1675	*_address = (void*)((addr_t)*_address + mapOffset);
1676
1677	area->cache_type = CACHE_TYPE_DEVICE;
1678	return area->id;
1679}
1680
1681
1682/*!	Don't use!
1683	TODO: This function was introduced to map physical page vecs to
1684	contiguous virtual memory in IOBuffer::GetNextVirtualVec(). It does
1685	use a device cache and does not track vm_page::wired_count!
1686*/
1687area_id
1688vm_map_physical_memory_vecs(team_id team, const char* name, void** _address,
1689	uint32 addressSpec, addr_t* _size, uint32 protection,
1690	struct generic_io_vec* vecs, uint32 vecCount)
1691{
1692	TRACE(("vm_map_physical_memory_vecs(team = %" B_PRId32 ", \"%s\", virtual "
1693		"= %p, spec = %" B_PRIu32 ", _size = %p, protection = %" B_PRIu32 ", "
1694		"vecs = %p, vecCount = %" B_PRIu32 ")\n", team, name, *_address,
1695		addressSpec, _size, protection, vecs, vecCount));
1696
1697	if (!arch_vm_supports_protection(protection)
1698		|| (addressSpec & B_MTR_MASK) != 0) {
1699		return B_NOT_SUPPORTED;
1700	}
1701
1702	AddressSpaceWriteLocker locker(team);
1703	if (!locker.IsLocked())
1704		return B_BAD_TEAM_ID;
1705
1706	if (vecCount == 0)
1707		return B_BAD_VALUE;
1708
1709	addr_t size = 0;
1710	for (uint32 i = 0; i < vecCount; i++) {
1711		if (vecs[i].base % B_PAGE_SIZE != 0
1712			|| vecs[i].length % B_PAGE_SIZE != 0) {
1713			return B_BAD_VALUE;
1714		}
1715
1716		size += vecs[i].length;
1717	}
1718
1719	// create a device cache
1720	VMCache* cache;
1721	status_t result = VMCacheFactory::CreateDeviceCache(cache, vecs[0].base);
1722	if (result != B_OK)
1723		return result;
1724
1725	cache->virtual_end = size;
1726
1727	cache->Lock();
1728
1729	VMArea* area;
1730	virtual_address_restrictions addressRestrictions = {};
1731	addressRestrictions.address = *_address;
1732	addressRestrictions.address_specification = addressSpec & ~B_MTR_MASK;
1733	result = map_backing_store(locker.AddressSpace(), cache, 0, name,
1734		size, B_FULL_LOCK, protection, REGION_NO_PRIVATE_MAP, 0,
1735		&addressRestrictions, true, &area, _address);
1736
1737	if (result != B_OK)
1738		cache->ReleaseRefLocked();
1739
1740	cache->Unlock();
1741
1742	if (result != B_OK)
1743		return result;
1744
1745	VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1746	size_t reservePages = map->MaxPagesNeededToMap(area->Base(),
1747		area->Base() + (size - 1));
1748
1749	vm_page_reservation reservation;
1750	vm_page_reserve_pages(&reservation, reservePages,
1751			team == VMAddressSpace::KernelID()
1752				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1753	map->Lock();
1754
1755	uint32 vecIndex = 0;
1756	size_t vecOffset = 0;
1757	for (addr_t offset = 0; offset < size; offset += B_PAGE_SIZE) {
1758		while (vecOffset >= vecs[vecIndex].length && vecIndex < vecCount) {
1759			vecOffset = 0;
1760			vecIndex++;
1761		}
1762
1763		if (vecIndex >= vecCount)
1764			break;
1765
1766		map->Map(area->Base() + offset, vecs[vecIndex].base + vecOffset,
1767			protection, area->MemoryType(), &reservation);
1768
1769		vecOffset += B_PAGE_SIZE;
1770	}
1771
1772	map->Unlock();
1773	vm_page_unreserve_pages(&reservation);
1774
1775	if (_size != NULL)
1776		*_size = size;
1777
1778	area->cache_type = CACHE_TYPE_DEVICE;
1779	return area->id;
1780}
1781
1782
1783area_id
1784vm_create_null_area(team_id team, const char* name, void** address,
1785	uint32 addressSpec, addr_t size, uint32 flags)
1786{
1787	size = PAGE_ALIGN(size);
1788
1789	// Lock the address space and, if B_EXACT_ADDRESS and
1790	// CREATE_AREA_UNMAP_ADDRESS_RANGE were specified, ensure the address range
1791	// is not wired.
1792	AddressSpaceWriteLocker locker;
1793	do {
1794		if (locker.SetTo(team) != B_OK)
1795			return B_BAD_TEAM_ID;
1796	} while (addressSpec == B_EXACT_ADDRESS
1797		&& (flags & CREATE_AREA_UNMAP_ADDRESS_RANGE) != 0
1798		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1799			(addr_t)*address, size, &locker));
1800
1801	// create a null cache
1802	int priority = (flags & CREATE_AREA_PRIORITY_VIP) != 0
1803		? VM_PRIORITY_VIP : VM_PRIORITY_SYSTEM;
1804	VMCache* cache;
1805	status_t status = VMCacheFactory::CreateNullCache(priority, cache);
1806	if (status != B_OK)
1807		return status;
1808
1809	cache->temporary = 1;
1810	cache->virtual_end = size;
1811
1812	cache->Lock();
1813
1814	VMArea* area;
1815	virtual_address_restrictions addressRestrictions = {};
1816	addressRestrictions.address = *address;
1817	addressRestrictions.address_specification = addressSpec;
1818	status = map_backing_store(locker.AddressSpace(), cache, 0, name, size,
1819		B_LAZY_LOCK, B_KERNEL_READ_AREA, REGION_NO_PRIVATE_MAP, flags,
1820		&addressRestrictions, true, &area, address);
1821
1822	if (status < B_OK) {
1823		cache->ReleaseRefAndUnlock();
1824		return status;
1825	}
1826
1827	cache->Unlock();
1828
1829	area->cache_type = CACHE_TYPE_NULL;
1830	return area->id;
1831}
1832
1833
1834/*!	Creates the vnode cache for the specified \a vnode.
1835	The vnode has to be marked busy when calling this function.
1836*/
1837status_t
1838vm_create_vnode_cache(struct vnode* vnode, struct VMCache** cache)
1839{
1840	return VMCacheFactory::CreateVnodeCache(*cache, vnode);
1841}
1842
1843
1844/*!	\a cache must be locked. The area's address space must be read-locked.
1845*/
1846static void
1847pre_map_area_pages(VMArea* area, VMCache* cache,
1848	vm_page_reservation* reservation)
1849{
1850	addr_t baseAddress = area->Base();
1851	addr_t cacheOffset = area->cache_offset;
1852	page_num_t firstPage = cacheOffset / B_PAGE_SIZE;
1853	page_num_t endPage = firstPage + area->Size() / B_PAGE_SIZE;
1854
1855	for (VMCachePagesTree::Iterator it
1856				= cache->pages.GetIterator(firstPage, true, true);
1857			vm_page* page = it.Next();) {
1858		if (page->cache_offset >= endPage)
1859			break;
1860
1861		// skip busy and inactive pages
1862		if (page->busy || page->usage_count == 0)
1863			continue;
1864
1865		DEBUG_PAGE_ACCESS_START(page);
1866		map_page(area, page,
1867			baseAddress + (page->cache_offset * B_PAGE_SIZE - cacheOffset),
1868			B_READ_AREA | B_KERNEL_READ_AREA, reservation);
1869		DEBUG_PAGE_ACCESS_END(page);
1870	}
1871}
1872
1873
1874/*!	Will map the file specified by \a fd to an area in memory.
1875	The file will be mirrored beginning at the specified \a offset. The
1876	\a offset and \a size arguments have to be page aligned.
1877*/
1878static area_id
1879_vm_map_file(team_id team, const char* name, void** _address,
1880	uint32 addressSpec, size_t size, uint32 protection, uint32 mapping,
1881	bool unmapAddressRange, int fd, off_t offset, bool kernel)
1882{
1883	// TODO: for binary files, we want to make sure that they get the
1884	//	copy of a file at a given time, ie. later changes should not
1885	//	make it into the mapped copy -- this will need quite some changes
1886	//	to be done in a nice way
1887	TRACE(("_vm_map_file(fd = %d, offset = %" B_PRIdOFF ", size = %lu, mapping "
1888		"%" B_PRIu32 ")\n", fd, offset, size, mapping));
1889
1890	offset = ROUNDDOWN(offset, B_PAGE_SIZE);
1891	size = PAGE_ALIGN(size);
1892
1893	if (mapping == REGION_NO_PRIVATE_MAP)
1894		protection |= B_SHARED_AREA;
1895	if (addressSpec != B_EXACT_ADDRESS)
1896		unmapAddressRange = false;
1897
1898	if (fd < 0) {
1899		uint32 flags = unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0;
1900		virtual_address_restrictions virtualRestrictions = {};
1901		virtualRestrictions.address = *_address;
1902		virtualRestrictions.address_specification = addressSpec;
1903		physical_address_restrictions physicalRestrictions = {};
1904		return vm_create_anonymous_area(team, name, size, B_NO_LOCK, protection,
1905			flags, 0, &virtualRestrictions, &physicalRestrictions, kernel,
1906			_address);
1907	}
1908
1909	// get the open flags of the FD
1910	file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
1911	if (descriptor == NULL)
1912		return EBADF;
1913	int32 openMode = descriptor->open_mode;
1914	put_fd(descriptor);
1915
1916	// The FD must open for reading at any rate. For shared mapping with write
1917	// access, additionally the FD must be open for writing.
1918	if ((openMode & O_ACCMODE) == O_WRONLY
1919		|| (mapping == REGION_NO_PRIVATE_MAP
1920			&& (protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0
1921			&& (openMode & O_ACCMODE) == O_RDONLY)) {
1922		return EACCES;
1923	}
1924
1925	// get the vnode for the object, this also grabs a ref to it
1926	struct vnode* vnode = NULL;
1927	status_t status = vfs_get_vnode_from_fd(fd, kernel, &vnode);
1928	if (status < B_OK)
1929		return status;
1930	CObjectDeleter<struct vnode> vnodePutter(vnode, vfs_put_vnode);
1931
1932	// If we're going to pre-map pages, we need to reserve the pages needed by
1933	// the mapping backend upfront.
1934	page_num_t reservedPreMapPages = 0;
1935	vm_page_reservation reservation;
1936	if ((protection & B_READ_AREA) != 0) {
1937		AddressSpaceWriteLocker locker;
1938		status = locker.SetTo(team);
1939		if (status != B_OK)
1940			return status;
1941
1942		VMTranslationMap* map = locker.AddressSpace()->TranslationMap();
1943		reservedPreMapPages = map->MaxPagesNeededToMap(0, size - 1);
1944
1945		locker.Unlock();
1946
1947		vm_page_reserve_pages(&reservation, reservedPreMapPages,
1948			team == VMAddressSpace::KernelID()
1949				? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
1950	}
1951
1952	struct PageUnreserver {
1953		PageUnreserver(vm_page_reservation* reservation)
1954			:
1955			fReservation(reservation)
1956		{
1957		}
1958
1959		~PageUnreserver()
1960		{
1961			if (fReservation != NULL)
1962				vm_page_unreserve_pages(fReservation);
1963		}
1964
1965		vm_page_reservation* fReservation;
1966	} pageUnreserver(reservedPreMapPages > 0 ? &reservation : NULL);
1967
1968	// Lock the address space and, if the specified address range shall be
1969	// unmapped, ensure it is not wired.
1970	AddressSpaceWriteLocker locker;
1971	do {
1972		if (locker.SetTo(team) != B_OK)
1973			return B_BAD_TEAM_ID;
1974	} while (unmapAddressRange
1975		&& wait_if_address_range_is_wired(locker.AddressSpace(),
1976			(addr_t)*_address, size, &locker));
1977
1978	// TODO: this only works for file systems that use the file cache
1979	VMCache* cache;
1980	status = vfs_get_vnode_cache(vnode, &cache, false);
1981	if (status < B_OK)
1982		return status;
1983
1984	cache->Lock();
1985
1986	VMArea* area;
1987	virtual_address_restrictions addressRestrictions = {};
1988	addressRestrictions.address = *_address;
1989	addressRestrictions.address_specification = addressSpec;
1990	status = map_backing_store(locker.AddressSpace(), cache, offset, name, size,
1991		0, protection, mapping,
1992		unmapAddressRange ? CREATE_AREA_UNMAP_ADDRESS_RANGE : 0,
1993		&addressRestrictions, kernel, &area, _address);
1994
1995	if (status != B_OK || mapping == REGION_PRIVATE_MAP) {
1996		// map_backing_store() cannot know we no longer need the ref
1997		cache->ReleaseRefLocked();
1998	}
1999
2000	if (status == B_OK && (protection & B_READ_AREA) != 0)
2001		pre_map_area_pages(area, cache, &reservation);
2002
2003	cache->Unlock();
2004
2005	if (status == B_OK) {
2006		// TODO: this probably deserves a smarter solution, ie. don't always
2007		// prefetch stuff, and also, probably don't trigger it at this place.
2008		cache_prefetch_vnode(vnode, offset, min_c(size, 10LL * 1024 * 1024));
2009			// prefetches at max 10 MB starting from "offset"
2010	}
2011
2012	if (status != B_OK)
2013		return status;
2014
2015	area->cache_type = CACHE_TYPE_VNODE;
2016	return area->id;
2017}
2018
2019
2020area_id
2021vm_map_file(team_id aid, const char* name, void** address, uint32 addressSpec,
2022	addr_t size, uint32 protection, uint32 mapping, bool unmapAddressRange,
2023	int fd, off_t offset)
2024{
2025	if (!arch_vm_supports_protection(protection))
2026		return B_NOT_SUPPORTED;
2027
2028	return _vm_map_file(aid, name, address, addressSpec, size, protection,
2029		mapping, unmapAddressRange, fd, offset, true);
2030}
2031
2032
2033VMCache*
2034vm_area_get_locked_cache(VMArea* area)
2035{
2036	rw_lock_read_lock(&sAreaCacheLock);
2037
2038	while (true) {
2039		VMCache* cache = area->cache;
2040
2041		if (!cache->SwitchFromReadLock(&sAreaCacheLock)) {
2042			// cache has been deleted
2043			rw_lock_read_lock(&sAreaCacheLock);
2044			continue;
2045		}
2046
2047		rw_lock_read_lock(&sAreaCacheLock);
2048
2049		if (cache == area->cache) {
2050			cache->AcquireRefLocked();
2051			rw_lock_read_unlock(&sAreaCacheLock);
2052			return cache;
2053		}
2054
2055		// the cache changed in the meantime
2056		cache->Unlock();
2057	}
2058}
2059
2060
2061void
2062vm_area_put_locked_cache(VMCache* cache)
2063{
2064	cache->ReleaseRefAndUnlock();
2065}
2066
2067
2068area_id
2069vm_clone_area(team_id team, const char* name, void** address,
2070	uint32 addressSpec, uint32 protection, uint32 mapping, area_id sourceID,
2071	bool kernel)
2072{
2073	VMArea* newArea = NULL;
2074	VMArea* sourceArea;
2075
2076	// Check whether the source area exists and is cloneable. If so, mark it
2077	// B_SHARED_AREA, so that we don't get problems with copy-on-write.
2078	{
2079		AddressSpaceWriteLocker locker;
2080		status_t status = locker.SetFromArea(sourceID, sourceArea);
2081		if (status != B_OK)
2082			return status;
2083
2084		sourceArea->protection |= B_SHARED_AREA;
2085		protection |= B_SHARED_AREA;
2086	}
2087
2088	// Now lock both address spaces and actually do the cloning.
2089
2090	MultiAddressSpaceLocker locker;
2091	VMAddressSpace* sourceAddressSpace;
2092	status_t status = locker.AddArea(sourceID, false, &sourceAddressSpace);
2093	if (status != B_OK)
2094		return status;
2095
2096	VMAddressSpace* targetAddressSpace;
2097	status = locker.AddTeam(team, true, &targetAddressSpace);
2098	if (status != B_OK)
2099		return status;
2100
2101	status = locker.Lock();
2102	if (status != B_OK)
2103		return status;
2104
2105	sourceArea = lookup_area(sourceAddressSpace, sourceID);
2106	if (sourceArea == NULL)
2107		return B_BAD_VALUE;
2108
2109	VMCache* cache = vm_area_get_locked_cache(sourceArea);
2110
2111	if (!kernel && sourceAddressSpace != targetAddressSpace
2112		&& (sourceArea->protection & B_CLONEABLE_AREA) == 0) {
2113#if KDEBUG
2114		Team* team = thread_get_current_thread()->team;
2115		dprintf("team \"%s\" (%" B_PRId32 ") attempted to clone area \"%s\" (%"
2116			B_PRId32 ")!\n", team->Name(), team->id, sourceArea->name, sourceID);
2117#endif
2118		status = B_NOT_ALLOWED;
2119	} else if (sourceArea->cache_type == CACHE_TYPE_NULL) {
2120		status = B_NOT_ALLOWED;
2121	} else {
2122		virtual_address_restrictions addressRestrictions = {};
2123		addressRestrictions.address = *address;
2124		addressRestrictions.address_specification = addressSpec;
2125		status = map_backing_store(targetAddressSpace, cache,
2126			sourceArea->cache_offset, name, sourceArea->Size(),
2127			sourceArea->wiring, protection, mapping, 0, &addressRestrictions,
2128			kernel, &newArea, address);
2129	}
2130	if (status == B_OK && mapping != REGION_PRIVATE_MAP) {
2131		// If the mapping is REGION_PRIVATE_MAP, map_backing_store() needed
2132		// to create a new cache, and has therefore already acquired a reference
2133		// to the source cache - but otherwise it has no idea that we need
2134		// one.
2135		cache->AcquireRefLocked();
2136	}
2137	if (status == B_OK && newArea->wiring == B_FULL_LOCK) {
2138		// we need to map in everything at this point
2139		if (sourceArea->cache_type == CACHE_TYPE_DEVICE) {
2140			// we don't have actual pages to map but a physical area
2141			VMTranslationMap* map
2142				= sourceArea->address_space->TranslationMap();
2143			map->Lock();
2144
2145			phys_addr_t physicalAddress;
2146			uint32 oldProtection;
2147			map->Query(sourceArea->Base(), &physicalAddress, &oldProtection);
2148
2149			map->Unlock();
2150
2151			map = targetAddressSpace->TranslationMap();
2152			size_t reservePages = map->MaxPagesNeededToMap(newArea->Base(),
2153				newArea->Base() + (newArea->Size() - 1));
2154
2155			vm_page_reservation reservation;
2156			vm_page_reserve_pages(&reservation, reservePages,
2157				targetAddressSpace == VMAddressSpace::Kernel()
2158					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2159			map->Lock();
2160
2161			for (addr_t offset = 0; offset < newArea->Size();
2162					offset += B_PAGE_SIZE) {
2163				map->Map(newArea->Base() + offset, physicalAddress + offset,
2164					protection, newArea->MemoryType(), &reservation);
2165			}
2166
2167			map->Unlock();
2168			vm_page_unreserve_pages(&reservation);
2169		} else {
2170			VMTranslationMap* map = targetAddressSpace->TranslationMap();
2171			size_t reservePages = map->MaxPagesNeededToMap(
2172				newArea->Base(), newArea->Base() + (newArea->Size() - 1));
2173			vm_page_reservation reservation;
2174			vm_page_reserve_pages(&reservation, reservePages,
2175				targetAddressSpace == VMAddressSpace::Kernel()
2176					? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2177
2178			// map in all pages from source
2179			for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2180					vm_page* page  = it.Next();) {
2181				if (!page->busy) {
2182					DEBUG_PAGE_ACCESS_START(page);
2183					map_page(newArea, page,
2184						newArea->Base() + ((page->cache_offset << PAGE_SHIFT)
2185							- newArea->cache_offset),
2186						protection, &reservation);
2187					DEBUG_PAGE_ACCESS_END(page);
2188				}
2189			}
2190			// TODO: B_FULL_LOCK means that all pages are locked. We are not
2191			// ensuring that!
2192
2193			vm_page_unreserve_pages(&reservation);
2194		}
2195	}
2196	if (status == B_OK)
2197		newArea->cache_type = sourceArea->cache_type;
2198
2199	vm_area_put_locked_cache(cache);
2200
2201	if (status < B_OK)
2202		return status;
2203
2204	return newArea->id;
2205}
2206
2207
2208/*!	Deletes the specified area of the given address space.
2209
2210	The address space must be write-locked.
2211	The caller must ensure that the area does not have any wired ranges.
2212
2213	\param addressSpace The address space containing the area.
2214	\param area The area to be deleted.
2215	\param deletingAddressSpace \c true, if the address space is in the process
2216		of being deleted.
2217*/
2218static void
2219delete_area(VMAddressSpace* addressSpace, VMArea* area,
2220	bool deletingAddressSpace)
2221{
2222	ASSERT(!area->IsWired());
2223
2224	VMAreaHash::Remove(area);
2225
2226	// At this point the area is removed from the global hash table, but
2227	// still exists in the area list.
2228
2229	// Unmap the virtual address space the area occupied.
2230	{
2231		// We need to lock the complete cache chain.
2232		VMCache* topCache = vm_area_get_locked_cache(area);
2233		VMCacheChainLocker cacheChainLocker(topCache);
2234		cacheChainLocker.LockAllSourceCaches();
2235
2236		// If the area's top cache is a temporary cache and the area is the only
2237		// one referencing it (besides us currently holding a second reference),
2238		// the unmapping code doesn't need to care about preserving the accessed
2239		// and dirty flags of the top cache page mappings.
2240		bool ignoreTopCachePageFlags
2241			= topCache->temporary && topCache->RefCount() == 2;
2242
2243		area->address_space->TranslationMap()->UnmapArea(area,
2244			deletingAddressSpace, ignoreTopCachePageFlags);
2245	}
2246
2247	if (!area->cache->temporary)
2248		area->cache->WriteModified();
2249
2250	uint32 allocationFlags = addressSpace == VMAddressSpace::Kernel()
2251		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
2252
2253	arch_vm_unset_memory_type(area);
2254	addressSpace->RemoveArea(area, allocationFlags);
2255	addressSpace->Put();
2256
2257	area->cache->RemoveArea(area);
2258	area->cache->ReleaseRef();
2259
2260	addressSpace->DeleteArea(area, allocationFlags);
2261}
2262
2263
2264status_t
2265vm_delete_area(team_id team, area_id id, bool kernel)
2266{
2267	TRACE(("vm_delete_area(team = 0x%" B_PRIx32 ", area = 0x%" B_PRIx32 ")\n",
2268		team, id));
2269
2270	// lock the address space and make sure the area isn't wired
2271	AddressSpaceWriteLocker locker;
2272	VMArea* area;
2273	AreaCacheLocker cacheLocker;
2274
2275	do {
2276		status_t status = locker.SetFromArea(team, id, area);
2277		if (status != B_OK)
2278			return status;
2279
2280		cacheLocker.SetTo(area);
2281	} while (wait_if_area_is_wired(area, &locker, &cacheLocker));
2282
2283	cacheLocker.Unlock();
2284
2285	// SetFromArea will have returned an error if the area's owning team is not
2286	// the same as the passed team, so we don't need to do those checks here.
2287
2288	delete_area(locker.AddressSpace(), area, false);
2289	return B_OK;
2290}
2291
2292
2293/*!	Creates a new cache on top of given cache, moves all areas from
2294	the old cache to the new one, and changes the protection of all affected
2295	areas' pages to read-only. If requested, wired pages are moved up to the
2296	new cache and copies are added to the old cache in their place.
2297	Preconditions:
2298	- The given cache must be locked.
2299	- All of the cache's areas' address spaces must be read locked.
2300	- Either the cache must not have any wired ranges or a page reservation for
2301	  all wired pages must be provided, so they can be copied.
2302
2303	\param lowerCache The cache on top of which a new cache shall be created.
2304	\param wiredPagesReservation If \c NULL there must not be any wired pages
2305		in \a lowerCache. Otherwise as many pages must be reserved as the cache
2306		has wired page. The wired pages are copied in this case.
2307*/
2308static status_t
2309vm_copy_on_write_area(VMCache* lowerCache,
2310	vm_page_reservation* wiredPagesReservation)
2311{
2312	VMCache* upperCache;
2313
2314	TRACE(("vm_copy_on_write_area(cache = %p)\n", lowerCache));
2315
2316	// We need to separate the cache from its areas. The cache goes one level
2317	// deeper and we create a new cache inbetween.
2318
2319	// create an anonymous cache
2320	status_t status = VMCacheFactory::CreateAnonymousCache(upperCache, false, 0,
2321		lowerCache->GuardSize() / B_PAGE_SIZE,
2322		dynamic_cast<VMAnonymousNoSwapCache*>(lowerCache) == NULL,
2323		VM_PRIORITY_USER);
2324	if (status != B_OK)
2325		return status;
2326
2327	upperCache->Lock();
2328
2329	upperCache->temporary = 1;
2330	upperCache->virtual_base = lowerCache->virtual_base;
2331	upperCache->virtual_end = lowerCache->virtual_end;
2332
2333	// transfer the lower cache areas to the upper cache
2334	rw_lock_write_lock(&sAreaCacheLock);
2335	upperCache->TransferAreas(lowerCache);
2336	rw_lock_write_unlock(&sAreaCacheLock);
2337
2338	lowerCache->AddConsumer(upperCache);
2339
2340	// We now need to remap all pages from all of the cache's areas read-only,
2341	// so that a copy will be created on next write access. If there are wired
2342	// pages, we keep their protection, move them to the upper cache and create
2343	// copies for the lower cache.
2344	if (wiredPagesReservation != NULL) {
2345		// We need to handle wired pages -- iterate through the cache's pages.
2346		for (VMCachePagesTree::Iterator it = lowerCache->pages.GetIterator();
2347				vm_page* page = it.Next();) {
2348			if (page->WiredCount() > 0) {
2349				// allocate a new page and copy the wired one
2350				vm_page* copiedPage = vm_page_allocate_page(
2351					wiredPagesReservation, PAGE_STATE_ACTIVE);
2352
2353				vm_memcpy_physical_page(
2354					copiedPage->physical_page_number * B_PAGE_SIZE,
2355					page->physical_page_number * B_PAGE_SIZE);
2356
2357				// move the wired page to the upper cache (note: removing is OK
2358				// with the SplayTree iterator) and insert the copy
2359				upperCache->MovePage(page);
2360				lowerCache->InsertPage(copiedPage,
2361					page->cache_offset * B_PAGE_SIZE);
2362
2363				DEBUG_PAGE_ACCESS_END(copiedPage);
2364			} else {
2365				// Change the protection of this page in all areas.
2366				for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2367						tempArea = tempArea->cache_next) {
2368					// The area must be readable in the same way it was
2369					// previously writable.
2370					uint32 protection = B_KERNEL_READ_AREA;
2371					if ((tempArea->protection & B_READ_AREA) != 0)
2372						protection |= B_READ_AREA;
2373
2374					VMTranslationMap* map
2375						= tempArea->address_space->TranslationMap();
2376					map->Lock();
2377					map->ProtectPage(tempArea,
2378						virtual_page_address(tempArea, page), protection);
2379					map->Unlock();
2380				}
2381			}
2382		}
2383	} else {
2384		ASSERT(lowerCache->WiredPagesCount() == 0);
2385
2386		// just change the protection of all areas
2387		for (VMArea* tempArea = upperCache->areas; tempArea != NULL;
2388				tempArea = tempArea->cache_next) {
2389			// The area must be readable in the same way it was previously
2390			// writable.
2391			uint32 protection = B_KERNEL_READ_AREA;
2392			if ((tempArea->protection & B_READ_AREA) != 0)
2393				protection |= B_READ_AREA;
2394
2395			VMTranslationMap* map = tempArea->address_space->TranslationMap();
2396			map->Lock();
2397			map->ProtectArea(tempArea, protection);
2398			map->Unlock();
2399		}
2400	}
2401
2402	vm_area_put_locked_cache(upperCache);
2403
2404	return B_OK;
2405}
2406
2407
2408area_id
2409vm_copy_area(team_id team, const char* name, void** _address,
2410	uint32 addressSpec, uint32 protection, area_id sourceID)
2411{
2412	bool writableCopy = (protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0;
2413
2414	if ((protection & B_KERNEL_PROTECTION) == 0) {
2415		// set the same protection for the kernel as for userland
2416		protection |= B_KERNEL_READ_AREA;
2417		if (writableCopy)
2418			protection |= B_KERNEL_WRITE_AREA;
2419	}
2420
2421	// Do the locking: target address space, all address spaces associated with
2422	// the source cache, and the cache itself.
2423	MultiAddressSpaceLocker locker;
2424	VMAddressSpace* targetAddressSpace;
2425	VMCache* cache;
2426	VMArea* source;
2427	AreaCacheLocker cacheLocker;
2428	status_t status;
2429	bool sharedArea;
2430
2431	page_num_t wiredPages = 0;
2432	vm_page_reservation wiredPagesReservation;
2433
2434	bool restart;
2435	do {
2436		restart = false;
2437
2438		locker.Unset();
2439		status = locker.AddTeam(team, true, &targetAddressSpace);
2440		if (status == B_OK) {
2441			status = locker.AddAreaCacheAndLock(sourceID, false, false, source,
2442				&cache);
2443		}
2444		if (status != B_OK)
2445			return status;
2446
2447		cacheLocker.SetTo(cache, true);	// already locked
2448
2449		sharedArea = (source->protection & B_SHARED_AREA) != 0;
2450
2451		page_num_t oldWiredPages = wiredPages;
2452		wiredPages = 0;
2453
2454		// If the source area isn't shared, count the number of wired pages in
2455		// the cache and reserve as many pages.
2456		if (!sharedArea) {
2457			wiredPages = cache->WiredPagesCount();
2458
2459			if (wiredPages > oldWiredPages) {
2460				cacheLocker.Unlock();
2461				locker.Unlock();
2462
2463				if (oldWiredPages > 0)
2464					vm_page_unreserve_pages(&wiredPagesReservation);
2465
2466				vm_page_reserve_pages(&wiredPagesReservation, wiredPages,
2467					VM_PRIORITY_USER);
2468
2469				restart = true;
2470			}
2471		} else if (oldWiredPages > 0)
2472			vm_page_unreserve_pages(&wiredPagesReservation);
2473	} while (restart);
2474
2475	// unreserve pages later
2476	struct PagesUnreserver {
2477		PagesUnreserver(vm_page_reservation* reservation)
2478			:
2479			fReservation(reservation)
2480		{
2481		}
2482
2483		~PagesUnreserver()
2484		{
2485			if (fReservation != NULL)
2486				vm_page_unreserve_pages(fReservation);
2487		}
2488
2489	private:
2490		vm_page_reservation*	fReservation;
2491	} pagesUnreserver(wiredPages > 0 ? &wiredPagesReservation : NULL);
2492
2493	if (addressSpec == B_CLONE_ADDRESS) {
2494		addressSpec = B_EXACT_ADDRESS;
2495		*_address = (void*)source->Base();
2496	}
2497
2498	// First, create a cache on top of the source area, respectively use the
2499	// existing one, if this is a shared area.
2500
2501	VMArea* target;
2502	virtual_address_restrictions addressRestrictions = {};
2503	addressRestrictions.address = *_address;
2504	addressRestrictions.address_specification = addressSpec;
2505	status = map_backing_store(targetAddressSpace, cache, source->cache_offset,
2506		name, source->Size(), source->wiring, protection,
2507		sharedArea ? REGION_NO_PRIVATE_MAP : REGION_PRIVATE_MAP,
2508		writableCopy ? 0 : CREATE_AREA_DONT_COMMIT_MEMORY,
2509		&addressRestrictions, true, &target, _address);
2510	if (status < B_OK)
2511		return status;
2512
2513	if (sharedArea) {
2514		// The new area uses the old area's cache, but map_backing_store()
2515		// hasn't acquired a ref. So we have to do that now.
2516		cache->AcquireRefLocked();
2517	}
2518
2519	// If the source area is writable, we need to move it one layer up as well
2520
2521	if (!sharedArea) {
2522		if ((source->protection & (B_KERNEL_WRITE_AREA | B_WRITE_AREA)) != 0) {
2523			// TODO: do something more useful if this fails!
2524			if (vm_copy_on_write_area(cache,
2525					wiredPages > 0 ? &wiredPagesReservation : NULL) < B_OK) {
2526				panic("vm_copy_on_write_area() failed!\n");
2527			}
2528		}
2529	}
2530
2531	// we return the ID of the newly created area
2532	return target->id;
2533}
2534
2535
2536status_t
2537vm_set_area_protection(team_id team, area_id areaID, uint32 newProtection,
2538	bool kernel)
2539{
2540	fix_protection(&newProtection);
2541
2542	TRACE(("vm_set_area_protection(team = %#" B_PRIx32 ", area = %#" B_PRIx32
2543		", protection = %#" B_PRIx32 ")\n", team, areaID, newProtection));
2544
2545	if (!arch_vm_supports_protection(newProtection))
2546		return B_NOT_SUPPORTED;
2547
2548	bool becomesWritable
2549		= (newProtection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2550
2551	// lock address spaces and cache
2552	MultiAddressSpaceLocker locker;
2553	VMCache* cache;
2554	VMArea* area;
2555	status_t status;
2556	AreaCacheLocker cacheLocker;
2557	bool isWritable;
2558
2559	bool restart;
2560	do {
2561		restart = false;
2562
2563		locker.Unset();
2564		status = locker.AddAreaCacheAndLock(areaID, true, false, area, &cache);
2565		if (status != B_OK)
2566			return status;
2567
2568		cacheLocker.SetTo(cache, true);	// already locked
2569
2570		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
2571			dprintf("vm_set_area_protection: team %" B_PRId32 " tried to "
2572				"set protection %#" B_PRIx32 " on kernel area %" B_PRId32
2573				" (%s)\n", team, newProtection, areaID, area->name);
2574			return B_NOT_ALLOWED;
2575		}
2576
2577		if (area->protection == newProtection)
2578			return B_OK;
2579
2580		if (team != VMAddressSpace::KernelID()
2581			&& area->address_space->ID() != team) {
2582			// unless you're the kernel, you are only allowed to set
2583			// the protection of your own areas
2584			return B_NOT_ALLOWED;
2585		}
2586
2587		isWritable
2588			= (area->protection & (B_WRITE_AREA | B_KERNEL_WRITE_AREA)) != 0;
2589
2590		// Make sure the area (respectively, if we're going to call
2591		// vm_copy_on_write_area(), all areas of the cache) doesn't have any
2592		// wired ranges.
2593		if (!isWritable && becomesWritable && !cache->consumers.IsEmpty()) {
2594			for (VMArea* otherArea = cache->areas; otherArea != NULL;
2595					otherArea = otherArea->cache_next) {
2596				if (wait_if_area_is_wired(otherArea, &locker, &cacheLocker)) {
2597					restart = true;
2598					break;
2599				}
2600			}
2601		} else {
2602			if (wait_if_area_is_wired(area, &locker, &cacheLocker))
2603				restart = true;
2604		}
2605	} while (restart);
2606
2607	bool changePageProtection = true;
2608	bool changeTopCachePagesOnly = false;
2609
2610	if (isWritable && !becomesWritable) {
2611		// writable -> !writable
2612
2613		if (cache->source != NULL && cache->temporary) {
2614			if (cache->CountWritableAreas(area) == 0) {
2615				// Since this cache now lives from the pages in its source cache,
2616				// we can change the cache's commitment to take only those pages
2617				// into account that really are in this cache.
2618
2619				status = cache->Commit(cache->page_count * B_PAGE_SIZE,
2620					team == VMAddressSpace::KernelID()
2621						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2622
2623				// TODO: we may be able to join with our source cache, if
2624				// count == 0
2625			}
2626		}
2627
2628		// If only the writability changes, we can just remap the pages of the
2629		// top cache, since the pages of lower caches are mapped read-only
2630		// anyway. That's advantageous only, if the number of pages in the cache
2631		// is significantly smaller than the number of pages in the area,
2632		// though.
2633		if (newProtection
2634				== (area->protection & ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA))
2635			&& cache->page_count * 2 < area->Size() / B_PAGE_SIZE) {
2636			changeTopCachePagesOnly = true;
2637		}
2638	} else if (!isWritable && becomesWritable) {
2639		// !writable -> writable
2640
2641		if (!cache->consumers.IsEmpty()) {
2642			// There are consumers -- we have to insert a new cache. Fortunately
2643			// vm_copy_on_write_area() does everything that's needed.
2644			changePageProtection = false;
2645			status = vm_copy_on_write_area(cache, NULL);
2646		} else {
2647			// No consumers, so we don't need to insert a new one.
2648			if (cache->source != NULL && cache->temporary) {
2649				// the cache's commitment must contain all possible pages
2650				status = cache->Commit(cache->virtual_end - cache->virtual_base,
2651					team == VMAddressSpace::KernelID()
2652						? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
2653			}
2654
2655			if (status == B_OK && cache->source != NULL) {
2656				// There's a source cache, hence we can't just change all pages'
2657				// protection or we might allow writing into pages belonging to
2658				// a lower cache.
2659				changeTopCachePagesOnly = true;
2660			}
2661		}
2662	} else {
2663		// we don't have anything special to do in all other cases
2664	}
2665
2666	if (status == B_OK) {
2667		// remap existing pages in this cache
2668		if (changePageProtection) {
2669			VMTranslationMap* map = area->address_space->TranslationMap();
2670			map->Lock();
2671
2672			if (changeTopCachePagesOnly) {
2673				page_num_t firstPageOffset = area->cache_offset / B_PAGE_SIZE;
2674				page_num_t lastPageOffset
2675					= firstPageOffset + area->Size() / B_PAGE_SIZE;
2676				for (VMCachePagesTree::Iterator it = cache->pages.GetIterator();
2677						vm_page* page = it.Next();) {
2678					if (page->cache_offset >= firstPageOffset
2679						&& page->cache_offset <= lastPageOffset) {
2680						addr_t address = virtual_page_address(area, page);
2681						map->ProtectPage(area, address, newProtection);
2682					}
2683				}
2684			} else
2685				map->ProtectArea(area, newProtection);
2686
2687			map->Unlock();
2688		}
2689
2690		area->protection = newProtection;
2691	}
2692
2693	return status;
2694}
2695
2696
2697status_t
2698vm_get_page_mapping(team_id team, addr_t vaddr, phys_addr_t* paddr)
2699{
2700	VMAddressSpace* addressSpace = VMAddressSpace::Get(team);
2701	if (addressSpace == NULL)
2702		return B_BAD_TEAM_ID;
2703
2704	VMTranslationMap* map = addressSpace->TranslationMap();
2705
2706	map->Lock();
2707	uint32 dummyFlags;
2708	status_t status = map->Query(vaddr, paddr, &dummyFlags);
2709	map->Unlock();
2710
2711	addressSpace->Put();
2712	return status;
2713}
2714
2715
2716/*!	The page's cache must be locked.
2717*/
2718bool
2719vm_test_map_modification(vm_page* page)
2720{
2721	if (page->modified)
2722		return true;
2723
2724	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2725	vm_page_mapping* mapping;
2726	while ((mapping = iterator.Next()) != NULL) {
2727		VMArea* area = mapping->area;
2728		VMTranslationMap* map = area->address_space->TranslationMap();
2729
2730		phys_addr_t physicalAddress;
2731		uint32 flags;
2732		map->Lock();
2733		map->Query(virtual_page_address(area, page), &physicalAddress, &flags);
2734		map->Unlock();
2735
2736		if ((flags & PAGE_MODIFIED) != 0)
2737			return true;
2738	}
2739
2740	return false;
2741}
2742
2743
2744/*!	The page's cache must be locked.
2745*/
2746void
2747vm_clear_map_flags(vm_page* page, uint32 flags)
2748{
2749	if ((flags & PAGE_ACCESSED) != 0)
2750		page->accessed = false;
2751	if ((flags & PAGE_MODIFIED) != 0)
2752		page->modified = false;
2753
2754	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2755	vm_page_mapping* mapping;
2756	while ((mapping = iterator.Next()) != NULL) {
2757		VMArea* area = mapping->area;
2758		VMTranslationMap* map = area->address_space->TranslationMap();
2759
2760		map->Lock();
2761		map->ClearFlags(virtual_page_address(area, page), flags);
2762		map->Unlock();
2763	}
2764}
2765
2766
2767/*!	Removes all mappings from a page.
2768	After you've called this function, the page is unmapped from memory and
2769	the page's \c accessed and \c modified flags have been updated according
2770	to the state of the mappings.
2771	The page's cache must be locked.
2772*/
2773void
2774vm_remove_all_page_mappings(vm_page* page)
2775{
2776	while (vm_page_mapping* mapping = page->mappings.Head()) {
2777		VMArea* area = mapping->area;
2778		VMTranslationMap* map = area->address_space->TranslationMap();
2779		addr_t address = virtual_page_address(area, page);
2780		map->UnmapPage(area, address, false);
2781	}
2782}
2783
2784
2785int32
2786vm_clear_page_mapping_accessed_flags(struct vm_page *page)
2787{
2788	int32 count = 0;
2789
2790	vm_page_mappings::Iterator iterator = page->mappings.GetIterator();
2791	vm_page_mapping* mapping;
2792	while ((mapping = iterator.Next()) != NULL) {
2793		VMArea* area = mapping->area;
2794		VMTranslationMap* map = area->address_space->TranslationMap();
2795
2796		bool modified;
2797		if (map->ClearAccessedAndModified(area,
2798				virtual_page_address(area, page), false, modified)) {
2799			count++;
2800		}
2801
2802		page->modified |= modified;
2803	}
2804
2805
2806	if (page->accessed) {
2807		count++;
2808		page->accessed = false;
2809	}
2810
2811	return count;
2812}
2813
2814
2815/*!	Removes all mappings of a page and/or clears the accessed bits of the
2816	mappings.
2817	The function iterates through the page mappings and removes them until
2818	encountering one that has been accessed. From then on it will continue to
2819	iterate, but only clear the accessed flag of the mapping. The page's
2820	\c modified bit will be updated accordingly, the \c accessed bit will be
2821	cleared.
2822	\return The number of mapping accessed bits encountered, including the
2823		\c accessed bit of the page itself. If \c 0 is returned, all mappings
2824		of the page have been removed.
2825*/
2826int32
2827vm_remove_all_page_mappings_if_unaccessed(struct vm_page *page)
2828{
2829	ASSERT(page->WiredCount() == 0);
2830
2831	if (page->accessed)
2832		return vm_clear_page_mapping_accessed_flags(page);
2833
2834	while (vm_page_mapping* mapping = page->mappings.Head()) {
2835		VMArea* area = mapping->area;
2836		VMTranslationMap* map = area->address_space->TranslationMap();
2837		addr_t address = virtual_page_address(area, page);
2838		bool modified = false;
2839		if (map->ClearAccessedAndModified(area, address, true, modified)) {
2840			page->accessed = true;
2841			page->modified |= modified;
2842			return vm_clear_page_mapping_accessed_flags(page);
2843		}
2844		page->modified |= modified;
2845	}
2846
2847	return 0;
2848}
2849
2850
2851static int
2852display_mem(int argc, char** argv)
2853{
2854	bool physical = false;
2855	addr_t copyAddress;
2856	int32 displayWidth;
2857	int32 itemSize;
2858	int32 num = -1;
2859	addr_t address;
2860	int i = 1, j;
2861
2862	if (argc > 1 && argv[1][0] == '-') {
2863		if (!strcmp(argv[1], "-p") || !strcmp(argv[1], "--physical")) {
2864			physical = true;
2865			i++;
2866		} else
2867			i = 99;
2868	}
2869
2870	if (argc < i + 1 || argc > i + 2) {
2871		kprintf("usage: dl/dw/ds/db/string [-p|--physical] <address> [num]\n"
2872			"\tdl - 8 bytes\n"
2873			"\tdw - 4 bytes\n"
2874			"\tds - 2 bytes\n"
2875			"\tdb - 1 byte\n"
2876			"\tstring - a whole string\n"
2877			"  -p or --physical only allows memory from a single page to be "
2878			"displayed.\n");
2879		return 0;
2880	}
2881
2882	address = parse_expression(argv[i]);
2883
2884	if (argc > i + 1)
2885		num = parse_expression(argv[i + 1]);
2886
2887	// build the format string
2888	if (strcmp(argv[0], "db") == 0) {
2889		itemSize = 1;
2890		displayWidth = 16;
2891	} else if (strcmp(argv[0], "ds") == 0) {
2892		itemSize = 2;
2893		displayWidth = 8;
2894	} else if (strcmp(argv[0], "dw") == 0) {
2895		itemSize = 4;
2896		displayWidth = 4;
2897	} else if (strcmp(argv[0], "dl") == 0) {
2898		itemSize = 8;
2899		displayWidth = 2;
2900	} else if (strcmp(argv[0], "string") == 0) {
2901		itemSize = 1;
2902		displayWidth = -1;
2903	} else {
2904		kprintf("display_mem called in an invalid way!\n");
2905		return 0;
2906	}
2907
2908	if (num <= 0)
2909		num = displayWidth;
2910
2911	void* physicalPageHandle = NULL;
2912
2913	if (physical) {
2914		int32 offset = address & (B_PAGE_SIZE - 1);
2915		if (num * itemSize + offset > B_PAGE_SIZE) {
2916			num = (B_PAGE_SIZE - offset) / itemSize;
2917			kprintf("NOTE: number of bytes has been cut to page size\n");
2918		}
2919
2920		address = ROUNDDOWN(address, B_PAGE_SIZE);
2921
2922		if (vm_get_physical_page_debug(address, &copyAddress,
2923				&physicalPageHandle) != B_OK) {
2924			kprintf("getting the hardware page failed.");
2925			return 0;
2926		}
2927
2928		address += offset;
2929		copyAddress += offset;
2930	} else
2931		copyAddress = address;
2932
2933	if (!strcmp(argv[0], "string")) {
2934		kprintf("%p \"", (char*)copyAddress);
2935
2936		// string mode
2937		for (i = 0; true; i++) {
2938			char c;
2939			if (debug_memcpy(B_CURRENT_TEAM, &c, (char*)copyAddress + i, 1)
2940					!= B_OK
2941				|| c == '\0') {
2942				break;
2943			}
2944
2945			if (c == '\n')
2946				kprintf("\\n");
2947			else if (c == '\t')
2948				kprintf("\\t");
2949			else {
2950				if (!isprint(c))
2951					c = '.';
2952
2953				kprintf("%c", c);
2954			}
2955		}
2956
2957		kprintf("\"\n");
2958	} else {
2959		// number mode
2960		for (i = 0; i < num; i++) {
2961			uint64 value;
2962
2963			if ((i % displayWidth) == 0) {
2964				int32 displayed = min_c(displayWidth, (num-i)) * itemSize;
2965				if (i != 0)
2966					kprintf("\n");
2967
2968				kprintf("[0x%lx]  ", address + i * itemSize);
2969
2970				for (j = 0; j < displayed; j++) {
2971					char c;
2972					if (debug_memcpy(B_CURRENT_TEAM, &c,
2973							(char*)copyAddress + i * itemSize + j, 1) != B_OK) {
2974						displayed = j;
2975						break;
2976					}
2977					if (!isprint(c))
2978						c = '.';
2979
2980					kprintf("%c", c);
2981				}
2982				if (num > displayWidth) {
2983					// make sure the spacing in the last line is correct
2984					for (j = displayed; j < displayWidth * itemSize; j++)
2985						kprintf(" ");
2986				}
2987				kprintf("  ");
2988			}
2989
2990			if (debug_memcpy(B_CURRENT_TEAM, &value,
2991					(uint8*)copyAddress + i * itemSize, itemSize) != B_OK) {
2992				kprintf("read fault");
2993				break;
2994			}
2995
2996			switch (itemSize) {
2997				case 1:
2998					kprintf(" %02" B_PRIx8, *(uint8*)&value);
2999					break;
3000				case 2:
3001					kprintf(" %04" B_PRIx16, *(uint16*)&value);
3002					break;
3003				case 4:
3004					kprintf(" %08" B_PRIx32, *(uint32*)&value);
3005					break;
3006				case 8:
3007					kprintf(" %016" B_PRIx64, *(uint64*)&value);
3008					break;
3009			}
3010		}
3011
3012		kprintf("\n");
3013	}
3014
3015	if (physical) {
3016		copyAddress = ROUNDDOWN(copyAddress, B_PAGE_SIZE);
3017		vm_put_physical_page_debug(copyAddress, physicalPageHandle);
3018	}
3019	return 0;
3020}
3021
3022
3023static void
3024dump_cache_tree_recursively(VMCache* cache, int level,
3025	VMCache* highlightCache)
3026{
3027	// print this cache
3028	for (int i = 0; i < level; i++)
3029		kprintf("  ");
3030	if (cache == highlightCache)
3031		kprintf("%p <--\n", cache);
3032	else
3033		kprintf("%p\n", cache);
3034
3035	// recursively print its consumers
3036	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3037			VMCache* consumer = it.Next();) {
3038		dump_cache_tree_recursively(consumer, level + 1, highlightCache);
3039	}
3040}
3041
3042
3043static int
3044dump_cache_tree(int argc, char** argv)
3045{
3046	if (argc != 2 || !strcmp(argv[1], "--help")) {
3047		kprintf("usage: %s <address>\n", argv[0]);
3048		return 0;
3049	}
3050
3051	addr_t address = parse_expression(argv[1]);
3052	if (address == 0)
3053		return 0;
3054
3055	VMCache* cache = (VMCache*)address;
3056	VMCache* root = cache;
3057
3058	// find the root cache (the transitive source)
3059	while (root->source != NULL)
3060		root = root->source;
3061
3062	dump_cache_tree_recursively(root, 0, cache);
3063
3064	return 0;
3065}
3066
3067
3068const char*
3069vm_cache_type_to_string(int32 type)
3070{
3071	switch (type) {
3072		case CACHE_TYPE_RAM:
3073			return "RAM";
3074		case CACHE_TYPE_DEVICE:
3075			return "device";
3076		case CACHE_TYPE_VNODE:
3077			return "vnode";
3078		case CACHE_TYPE_NULL:
3079			return "null";
3080
3081		default:
3082			return "unknown";
3083	}
3084}
3085
3086
3087#if DEBUG_CACHE_LIST
3088
3089static void
3090update_cache_info_recursively(VMCache* cache, cache_info& info)
3091{
3092	info.page_count += cache->page_count;
3093	if (cache->type == CACHE_TYPE_RAM)
3094		info.committed += cache->committed_size;
3095
3096	// recurse
3097	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3098			VMCache* consumer = it.Next();) {
3099		update_cache_info_recursively(consumer, info);
3100	}
3101}
3102
3103
3104static int
3105cache_info_compare_page_count(const void* _a, const void* _b)
3106{
3107	const cache_info* a = (const cache_info*)_a;
3108	const cache_info* b = (const cache_info*)_b;
3109	if (a->page_count == b->page_count)
3110		return 0;
3111	return a->page_count < b->page_count ? 1 : -1;
3112}
3113
3114
3115static int
3116cache_info_compare_committed(const void* _a, const void* _b)
3117{
3118	const cache_info* a = (const cache_info*)_a;
3119	const cache_info* b = (const cache_info*)_b;
3120	if (a->committed == b->committed)
3121		return 0;
3122	return a->committed < b->committed ? 1 : -1;
3123}
3124
3125
3126static void
3127dump_caches_recursively(VMCache* cache, cache_info& info, int level)
3128{
3129	for (int i = 0; i < level; i++)
3130		kprintf("  ");
3131
3132	kprintf("%p: type: %s, base: %" B_PRIdOFF ", size: %" B_PRIdOFF ", "
3133		"pages: %" B_PRIu32, cache, vm_cache_type_to_string(cache->type),
3134		cache->virtual_base, cache->virtual_end, cache->page_count);
3135
3136	if (level == 0)
3137		kprintf("/%lu", info.page_count);
3138
3139	if (cache->type == CACHE_TYPE_RAM || (level == 0 && info.committed > 0)) {
3140		kprintf(", committed: %" B_PRIdOFF, cache->committed_size);
3141
3142		if (level == 0)
3143			kprintf("/%lu", info.committed);
3144	}
3145
3146	// areas
3147	if (cache->areas != NULL) {
3148		VMArea* area = cache->areas;
3149		kprintf(", areas: %" B_PRId32 " (%s, team: %" B_PRId32 ")", area->id,
3150			area->name, area->address_space->ID());
3151
3152		while (area->cache_next != NULL) {
3153			area = area->cache_next;
3154			kprintf(", %" B_PRId32, area->id);
3155		}
3156	}
3157
3158	kputs("\n");
3159
3160	// recurse
3161	for (VMCache::ConsumerList::Iterator it = cache->consumers.GetIterator();
3162			VMCache* consumer = it.Next();) {
3163		dump_caches_recursively(consumer, info, level + 1);
3164	}
3165}
3166
3167
3168static int
3169dump_caches(int argc, char** argv)
3170{
3171	if (sCacheInfoTable == NULL) {
3172		kprintf("No cache info table!\n");
3173		return 0;
3174	}
3175
3176	bool sortByPageCount = true;
3177
3178	for (int32 i = 1; i < argc; i++) {
3179		if (strcmp(argv[i], "-c") == 0) {
3180			sortByPageCount = false;
3181		} else {
3182			print_debugger_command_usage(argv[0]);
3183			return 0;
3184		}
3185	}
3186
3187	uint32 totalCount = 0;
3188	uint32 rootCount = 0;
3189	off_t totalCommitted = 0;
3190	page_num_t totalPages = 0;
3191
3192	VMCache* cache = gDebugCacheList;
3193	while (cache) {
3194		totalCount++;
3195		if (cache->source == NULL) {
3196			cache_info stackInfo;
3197			cache_info& info = rootCount < (uint32)kCacheInfoTableCount
3198				? sCacheInfoTable[rootCount] : stackInfo;
3199			rootCount++;
3200			info.cache = cache;
3201			info.page_count = 0;
3202			info.committed = 0;
3203			update_cache_info_recursively(cache, info);
3204			totalCommitted += info.committed;
3205			totalPages += info.page_count;
3206		}
3207
3208		cache = cache->debug_next;
3209	}
3210
3211	if (rootCount <= (uint32)kCacheInfoTableCount) {
3212		qsort(sCacheInfoTable, rootCount, sizeof(cache_info),
3213			sortByPageCount
3214				? &cache_info_compare_page_count
3215				: &cache_info_compare_committed);
3216	}
3217
3218	kprintf("total committed memory: %" B_PRIdOFF ", total used pages: %"
3219		B_PRIuPHYSADDR "\n", totalCommitted, totalPages);
3220	kprintf("%" B_PRIu32 " caches (%" B_PRIu32 " root caches), sorted by %s "
3221		"per cache tree...\n\n", totalCount, rootCount, sortByPageCount ?
3222			"page count" : "committed size");
3223
3224	if (rootCount <= (uint32)kCacheInfoTableCount) {
3225		for (uint32 i = 0; i < rootCount; i++) {
3226			cache_info& info = sCacheInfoTable[i];
3227			dump_caches_recursively(info.cache, info, 0);
3228		}
3229	} else
3230		kprintf("Cache info table too small! Can't sort and print caches!\n");
3231
3232	return 0;
3233}
3234
3235#endif	// DEBUG_CACHE_LIST
3236
3237
3238static int
3239dump_cache(int argc, char** argv)
3240{
3241	VMCache* cache;
3242	bool showPages = false;
3243	int i = 1;
3244
3245	if (argc < 2 || !strcmp(argv[1], "--help")) {
3246		kprintf("usage: %s [-ps] <address>\n"
3247			"  if -p is specified, all pages are shown, if -s is used\n"
3248			"  only the cache info is shown respectively.\n", argv[0]);
3249		return 0;
3250	}
3251	while (argv[i][0] == '-') {
3252		char* arg = argv[i] + 1;
3253		while (arg[0]) {
3254			if (arg[0] == 'p')
3255				showPages = true;
3256			arg++;
3257		}
3258		i++;
3259	}
3260	if (argv[i] == NULL) {
3261		kprintf("%s: invalid argument, pass address\n", argv[0]);
3262		return 0;
3263	}
3264
3265	addr_t address = parse_expression(argv[i]);
3266	if (address == 0)
3267		return 0;
3268
3269	cache = (VMCache*)address;
3270
3271	cache->Dump(showPages);
3272
3273	set_debug_variable("_sourceCache", (addr_t)cache->source);
3274
3275	return 0;
3276}
3277
3278
3279static void
3280dump_area_struct(VMArea* area, bool mappings)
3281{
3282	kprintf("AREA: %p\n", area);
3283	kprintf("name:\t\t'%s'\n", area->name);
3284	kprintf("owner:\t\t0x%" B_PRIx32 "\n", area->address_space->ID());
3285	kprintf("id:\t\t0x%" B_PRIx32 "\n", area->id);
3286	kprintf("base:\t\t0x%lx\n", area->Base());
3287	kprintf("size:\t\t0x%lx\n", area->Size());
3288	kprintf("protection:\t0x%" B_PRIx32 "\n", area->protection);
3289	kprintf("wiring:\t\t0x%x\n", area->wiring);
3290	kprintf("memory_type:\t%#" B_PRIx32 "\n", area->MemoryType());
3291	kprintf("cache:\t\t%p\n", area->cache);
3292	kprintf("cache_type:\t%s\n", vm_cache_type_to_string(area->cache_type));
3293	kprintf("cache_offset:\t0x%" B_PRIx64 "\n", area->cache_offset);
3294	kprintf("cache_next:\t%p\n", area->cache_next);
3295	kprintf("cache_prev:\t%p\n", area->cache_prev);
3296
3297	VMAreaMappings::Iterator iterator = area->mappings.GetIterator();
3298	if (mappings) {
3299		kprintf("page mappings:\n");
3300		while (iterator.HasNext()) {
3301			vm_page_mapping* mapping = iterator.Next();
3302			kprintf("  %p", mapping->page);
3303		}
3304		kprintf("\n");
3305	} else {
3306		uint32 count = 0;
3307		while (iterator.Next() != NULL) {
3308			count++;
3309		}
3310		kprintf("page mappings:\t%" B_PRIu32 "\n", count);
3311	}
3312}
3313
3314
3315static int
3316dump_area(int argc, char** argv)
3317{
3318	bool mappings = false;
3319	bool found = false;
3320	int32 index = 1;
3321	VMArea* area;
3322	addr_t num;
3323
3324	if (argc < 2 || !strcmp(argv[1], "--help")) {
3325		kprintf("usage: area [-m] [id|contains|address|name] <id|address|name>\n"
3326			"All areas matching either id/address/name are listed. You can\n"
3327			"force to check only a specific item by prefixing the specifier\n"
3328			"with the id/contains/address/name keywords.\n"
3329			"-m shows the area's mappings as well.\n");
3330		return 0;
3331	}
3332
3333	if (!strcmp(argv[1], "-m")) {
3334		mappings = true;
3335		index++;
3336	}
3337
3338	int32 mode = 0xf;
3339	if (!strcmp(argv[index], "id"))
3340		mode = 1;
3341	else if (!strcmp(argv[index], "contains"))
3342		mode = 2;
3343	else if (!strcmp(argv[index], "name"))
3344		mode = 4;
3345	else if (!strcmp(argv[index], "address"))
3346		mode = 0;
3347	if (mode != 0xf)
3348		index++;
3349
3350	if (index >= argc) {
3351		kprintf("No area specifier given.\n");
3352		return 0;
3353	}
3354
3355	num = parse_expression(argv[index]);
3356
3357	if (mode == 0) {
3358		dump_area_struct((struct VMArea*)num, mappings);
3359	} else {
3360		// walk through the area list, looking for the arguments as a name
3361
3362		VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3363		while ((area = it.Next()) != NULL) {
3364			if (((mode & 4) != 0 && area->name != NULL
3365					&& !strcmp(argv[index], area->name))
3366				|| (num != 0 && (((mode & 1) != 0 && (addr_t)area->id == num)
3367					|| (((mode & 2) != 0 && area->Base() <= num
3368						&& area->Base() + area->Size() > num))))) {
3369				dump_area_struct(area, mappings);
3370				found = true;
3371			}
3372		}
3373
3374		if (!found)
3375			kprintf("could not find area %s (%ld)\n", argv[index], num);
3376	}
3377
3378	return 0;
3379}
3380
3381
3382static int
3383dump_area_list(int argc, char** argv)
3384{
3385	VMArea* area;
3386	const char* name = NULL;
3387	int32 id = 0;
3388
3389	if (argc > 1) {
3390		id = parse_expression(argv[1]);
3391		if (id == 0)
3392			name = argv[1];
3393	}
3394
3395	kprintf("%-*s      id  %-*s    %-*sprotect lock  name\n",
3396		B_PRINTF_POINTER_WIDTH, "addr", B_PRINTF_POINTER_WIDTH, "base",
3397		B_PRINTF_POINTER_WIDTH, "size");
3398
3399	VMAreaHashTable::Iterator it = VMAreaHash::GetIterator();
3400	while ((area = it.Next()) != NULL) {
3401		if ((id != 0 && area->address_space->ID() != id)
3402			|| (name != NULL && strstr(area->name, name) == NULL))
3403			continue;
3404
3405		kprintf("%p %5" B_PRIx32 "  %p  %p %4" B_PRIx32 " %4d  %s\n", area,
3406			area->id, (void*)area->Base(), (void*)area->Size(),
3407			area->protection, area->wiring, area->name);
3408	}
3409	return 0;
3410}
3411
3412
3413static int
3414dump_available_memory(int argc, char** argv)
3415{
3416	kprintf("Available memory: %" B_PRIdOFF "/%" B_PRIuPHYSADDR " bytes\n",
3417		sAvailableMemory, (phys_addr_t)vm_page_num_pages() * B_PAGE_SIZE);
3418	return 0;
3419}
3420
3421
3422static int
3423dump_mapping_info(int argc, char** argv)
3424{
3425	bool reverseLookup = false;
3426	bool pageLookup = false;
3427
3428	int argi = 1;
3429	for (; argi < argc && argv[argi][0] == '-'; argi++) {
3430		const char* arg = argv[argi];
3431		if (strcmp(arg, "-r") == 0) {
3432			reverseLookup = true;
3433		} else if (strcmp(arg, "-p") == 0) {
3434			reverseLookup = true;
3435			pageLookup = true;
3436		} else {
3437			print_debugger_command_usage(argv[0]);
3438			return 0;
3439		}
3440	}
3441
3442	// We need at least one argument, the address. Optionally a thread ID can be
3443	// specified.
3444	if (argi >= argc || argi + 2 < argc) {
3445		print_debugger_command_usage(argv[0]);
3446		return 0;
3447	}
3448
3449	uint64 addressValue;
3450	if (!evaluate_debug_expression(argv[argi++], &addressValue, false))
3451		return 0;
3452
3453	Team* team = NULL;
3454	if (argi < argc) {
3455		uint64 threadID;
3456		if (!evaluate_debug_expression(argv[argi++], &threadID, false))
3457			return 0;
3458
3459		Thread* thread = Thread::GetDebug(threadID);
3460		if (thread == NULL) {
3461			kprintf("Invalid thread/team ID \"%s\"\n", argv[argi - 1]);
3462			return 0;
3463		}
3464
3465		team = thread->team;
3466	}
3467
3468	if (reverseLookup) {
3469		phys_addr_t physicalAddress;
3470		if (pageLookup) {
3471			vm_page* page = (vm_page*)(addr_t)addressValue;
3472			physicalAddress = page->physical_page_number * B_PAGE_SIZE;
3473		} else {
3474			physicalAddress = (phys_addr_t)addressValue;
3475			physicalAddress -= physicalAddress % B_PAGE_SIZE;
3476		}
3477
3478		kprintf("    Team     Virtual Address      Area\n");
3479		kprintf("--------------------------------------\n");
3480
3481		struct Callback : VMTranslationMap::ReverseMappingInfoCallback {
3482			Callback()
3483				:
3484				fAddressSpace(NULL)
3485			{
3486			}
3487
3488			void SetAddressSpace(VMAddressSpace* addressSpace)
3489			{
3490				fAddressSpace = addressSpace;
3491			}
3492
3493			virtual bool HandleVirtualAddress(addr_t virtualAddress)
3494			{
3495				kprintf("%8" B_PRId32 "  %#18" B_PRIxADDR, fAddressSpace->ID(),
3496					virtualAddress);
3497				if (VMArea* area = fAddressSpace->LookupArea(virtualAddress))
3498					kprintf("  %8" B_PRId32 " %s\n", area->id, area->name);
3499				else
3500					kprintf("\n");
3501				return false;
3502			}
3503
3504		private:
3505			VMAddressSpace*	fAddressSpace;
3506		} callback;
3507
3508		if (team != NULL) {
3509			// team specified -- get its address space
3510			VMAddressSpace* addressSpace = team->address_space;
3511			if (addressSpace == NULL) {
3512				kprintf("Failed to get address space!\n");
3513				return 0;
3514			}
3515
3516			callback.SetAddressSpace(addressSpace);
3517			addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3518				physicalAddress, callback);
3519		} else {
3520			// no team specified -- iterate through all address spaces
3521			for (VMAddressSpace* addressSpace = VMAddressSpace::DebugFirst();
3522				addressSpace != NULL;
3523				addressSpace = VMAddressSpace::DebugNext(addressSpace)) {
3524				callback.SetAddressSpace(addressSpace);
3525				addressSpace->TranslationMap()->DebugGetReverseMappingInfo(
3526					physicalAddress, callback);
3527			}
3528		}
3529	} else {
3530		// get the address space
3531		addr_t virtualAddress = (addr_t)addressValue;
3532		virtualAddress -= virtualAddress % B_PAGE_SIZE;
3533		VMAddressSpace* addressSpace;
3534		if (IS_KERNEL_ADDRESS(virtualAddress)) {
3535			addressSpace = VMAddressSpace::Kernel();
3536		} else if (team != NULL) {
3537			addressSpace = team->address_space;
3538		} else {
3539			Thread* thread = debug_get_debugged_thread();
3540			if (thread == NULL || thread->team == NULL) {
3541				kprintf("Failed to get team!\n");
3542				return 0;
3543			}
3544
3545			addressSpace = thread->team->address_space;
3546		}
3547
3548		if (addressSpace == NULL) {
3549			kprintf("Failed to get address space!\n");
3550			return 0;
3551		}
3552
3553		// let the translation map implementation do the job
3554		addressSpace->TranslationMap()->DebugPrintMappingInfo(virtualAddress);
3555	}
3556
3557	return 0;
3558}
3559
3560
3561/*!	Deletes all areas and reserved regions in the given address space.
3562
3563	The caller must ensure that none of the areas has any wired ranges.
3564
3565	\param addressSpace The address space.
3566	\param deletingAddressSpace \c true, if the address space is in the process
3567		of being deleted.
3568*/
3569void
3570vm_delete_areas(struct VMAddressSpace* addressSpace, bool deletingAddressSpace)
3571{
3572	TRACE(("vm_delete_areas: called on address space 0x%" B_PRIx32 "\n",
3573		addressSpace->ID()));
3574
3575	addressSpace->WriteLock();
3576
3577	// remove all reserved areas in this address space
3578	addressSpace->UnreserveAllAddressRanges(0);
3579
3580	// delete all the areas in this address space
3581	while (VMArea* area = addressSpace->FirstArea()) {
3582		ASSERT(!area->IsWired());
3583		delete_area(addressSpace, area, deletingAddressSpace);
3584	}
3585
3586	addressSpace->WriteUnlock();
3587}
3588
3589
3590static area_id
3591vm_area_for(addr_t address, bool kernel)
3592{
3593	team_id team;
3594	if (IS_USER_ADDRESS(address)) {
3595		// we try the user team address space, if any
3596		team = VMAddressSpace::CurrentID();
3597		if (team < 0)
3598			return team;
3599	} else
3600		team = VMAddressSpace::KernelID();
3601
3602	AddressSpaceReadLocker locker(team);
3603	if (!locker.IsLocked())
3604		return B_BAD_TEAM_ID;
3605
3606	VMArea* area = locker.AddressSpace()->LookupArea(address);
3607	if (area != NULL) {
3608		if (!kernel && (area->protection & (B_READ_AREA | B_WRITE_AREA)) == 0)
3609			return B_ERROR;
3610
3611		return area->id;
3612	}
3613
3614	return B_ERROR;
3615}
3616
3617
3618/*!	Frees physical pages that were used during the boot process.
3619	\a end is inclusive.
3620*/
3621static void
3622unmap_and_free_physical_pages(VMTranslationMap* map, addr_t start, addr_t end)
3623{
3624	// free all physical pages in the specified range
3625
3626	for (addr_t current = start; current < end; current += B_PAGE_SIZE) {
3627		phys_addr_t physicalAddress;
3628		uint32 flags;
3629
3630		if (map->Query(current, &physicalAddress, &flags) == B_OK
3631			&& (flags & PAGE_PRESENT) != 0) {
3632			vm_page* page = vm_lookup_page(physicalAddress / B_PAGE_SIZE);
3633			if (page != NULL && page->State() != PAGE_STATE_FREE
3634					 && page->State() != PAGE_STATE_CLEAR
3635					 && page->State() != PAGE_STATE_UNUSED) {
3636				DEBUG_PAGE_ACCESS_START(page);
3637				vm_page_set_state(page, PAGE_STATE_FREE);
3638			}
3639		}
3640	}
3641
3642	// unmap the memory
3643	map->Unmap(start, end);
3644}
3645
3646
3647void
3648vm_free_unused_boot_loader_range(addr_t start, addr_t size)
3649{
3650	VMTranslationMap* map = VMAddressSpace::Kernel()->TranslationMap();
3651	addr_t end = start + (size - 1);
3652	addr_t lastEnd = start;
3653
3654	TRACE(("vm_free_unused_boot_loader_range(): asked to free %p - %p\n",
3655		(void*)start, (void*)end));
3656
3657	// The areas are sorted in virtual address space order, so
3658	// we just have to find the holes between them that fall
3659	// into the area we should dispose
3660
3661	map->Lock();
3662
3663	for (VMAddressSpace::AreaIterator it
3664				= VMAddressSpace::Kernel()->GetAreaIterator();
3665			VMArea* area = it.Next();) {
3666		addr_t areaStart = area->Base();
3667		addr_t areaEnd = areaStart + (area->Size() - 1);
3668
3669		if (areaEnd < start)
3670			continue;
3671
3672		if (areaStart > end) {
3673			// we are done, the area is already beyond of what we have to free
3674			break;
3675		}
3676
3677		if (areaStart > lastEnd) {
3678			// this is something we can free
3679			TRACE(("free boot range: get rid of %p - %p\n", (void*)lastEnd,
3680				(void*)areaStart));
3681			unmap_and_free_physical_pages(map, lastEnd, areaStart - 1);
3682		}
3683
3684		if (areaEnd >= end) {
3685			lastEnd = areaEnd;
3686				// no +1 to prevent potential overflow
3687			break;
3688		}
3689
3690		lastEnd = areaEnd + 1;
3691	}
3692
3693	if (lastEnd < end) {
3694		// we can also get rid of some space at the end of the area
3695		TRACE(("free boot range: also remove %p - %p\n", (void*)lastEnd,
3696			(void*)end));
3697		unmap_and_free_physical_pages(map, lastEnd, end);
3698	}
3699
3700	map->Unlock();
3701}
3702
3703
3704static void
3705create_preloaded_image_areas(struct preloaded_image* _image)
3706{
3707	preloaded_elf_image* image = static_cast<preloaded_elf_image*>(_image);
3708	char name[B_OS_NAME_LENGTH];
3709	void* address;
3710	int32 length;
3711
3712	// use file name to create a good area name
3713	char* fileName = strrchr(image->name, '/');
3714	if (fileName == NULL)
3715		fileName = image->name;
3716	else
3717		fileName++;
3718
3719	length = strlen(fileName);
3720	// make sure there is enough space for the suffix
3721	if (length > 25)
3722		length = 25;
3723
3724	memcpy(name, fileName, length);
3725	strcpy(name + length, "_text");
3726	address = (void*)ROUNDDOWN(image->text_region.start, B_PAGE_SIZE);
3727	image->text_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3728		PAGE_ALIGN(image->text_region.size), B_ALREADY_WIRED,
3729		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3730		// this will later be remapped read-only/executable by the
3731		// ELF initialization code
3732
3733	strcpy(name + length, "_data");
3734	address = (void*)ROUNDDOWN(image->data_region.start, B_PAGE_SIZE);
3735	image->data_region.id = create_area(name, &address, B_EXACT_ADDRESS,
3736		PAGE_ALIGN(image->data_region.size), B_ALREADY_WIRED,
3737		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3738}
3739
3740
3741/*!	Frees all previously kernel arguments areas from the kernel_args structure.
3742	Any boot loader resources contained in that arguments must not be accessed
3743	anymore past this point.
3744*/
3745void
3746vm_free_kernel_args(kernel_args* args)
3747{
3748	uint32 i;
3749
3750	TRACE(("vm_free_kernel_args()\n"));
3751
3752	for (i = 0; i < args->num_kernel_args_ranges; i++) {
3753		area_id area = area_for((void*)(addr_t)args->kernel_args_range[i].start);
3754		if (area >= B_OK)
3755			delete_area(area);
3756	}
3757}
3758
3759
3760static void
3761allocate_kernel_args(kernel_args* args)
3762{
3763	TRACE(("allocate_kernel_args()\n"));
3764
3765	for (uint32 i = 0; i < args->num_kernel_args_ranges; i++) {
3766		void* address = (void*)(addr_t)args->kernel_args_range[i].start;
3767
3768		create_area("_kernel args_", &address, B_EXACT_ADDRESS,
3769			args->kernel_args_range[i].size, B_ALREADY_WIRED,
3770			B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
3771	}
3772}
3773
3774
3775static void
3776unreserve_boot_loader_ranges(kernel_args* args)
3777{
3778	TRACE(("unreserve_boot_loader_ranges()\n"));
3779
3780	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3781		vm_unreserve_address_range(VMAddressSpace::KernelID(),
3782			(void*)(addr_t)args->virtual_allocated_range[i].start,
3783			args->virtual_allocated_range[i].size);
3784	}
3785}
3786
3787
3788static void
3789reserve_boot_loader_ranges(kernel_args* args)
3790{
3791	TRACE(("reserve_boot_loader_ranges()\n"));
3792
3793	for (uint32 i = 0; i < args->num_virtual_allocated_ranges; i++) {
3794		void* address = (void*)(addr_t)args->virtual_allocated_range[i].start;
3795
3796		// If the address is no kernel address, we just skip it. The
3797		// architecture specific code has to deal with it.
3798		if (!IS_KERNEL_ADDRESS(address)) {
3799			dprintf("reserve_boot_loader_ranges(): Skipping range: %p, %"
3800				B_PRIu64 "\n", address, args->virtual_allocated_range[i].size);
3801			continue;
3802		}
3803
3804		status_t status = vm_reserve_address_range(VMAddressSpace::KernelID(),
3805			&address, B_EXACT_ADDRESS, args->virtual_allocated_range[i].size, 0);
3806		if (status < B_OK)
3807			panic("could not reserve boot loader ranges\n");
3808	}
3809}
3810
3811
3812static addr_t
3813allocate_early_virtual(kernel_args* args, size_t size, addr_t alignment)
3814{
3815	size = PAGE_ALIGN(size);
3816
3817	// find a slot in the virtual allocation addr range
3818	for (uint32 i = 1; i < args->num_virtual_allocated_ranges; i++) {
3819		// check to see if the space between this one and the last is big enough
3820		addr_t rangeStart = args->virtual_allocated_range[i].start;
3821		addr_t previousRangeEnd = args->virtual_allocated_range[i - 1].start
3822			+ args->virtual_allocated_range[i - 1].size;
3823
3824		addr_t base = alignment > 0
3825			? ROUNDUP(previousRangeEnd, alignment) : previousRangeEnd;
3826
3827		if (base >= KERNEL_BASE && base < rangeStart
3828				&& rangeStart - base >= size) {
3829			args->virtual_allocated_range[i - 1].size
3830				+= base + size - previousRangeEnd;
3831			return base;
3832		}
3833	}
3834
3835	// we hadn't found one between allocation ranges. this is ok.
3836	// see if there's a gap after the last one
3837	int lastEntryIndex = args->num_virtual_allocated_ranges - 1;
3838	addr_t lastRangeEnd = args->virtual_allocated_range[lastEntryIndex].start
3839		+ args->virtual_allocated_range[lastEntryIndex].size;
3840	addr_t base = alignment > 0
3841		? ROUNDUP(lastRangeEnd, alignment) : lastRangeEnd;
3842	if (KERNEL_BASE + (KERNEL_SIZE - 1) - base >= size) {
3843		args->virtual_allocated_range[lastEntryIndex].size
3844			+= base + size - lastRangeEnd;
3845		return base;
3846	}
3847
3848	// see if there's a gap before the first one
3849	addr_t rangeStart = args->virtual_allocated_range[0].start;
3850	if (rangeStart > KERNEL_BASE && rangeStart - KERNEL_BASE >= size) {
3851		base = rangeStart - size;
3852		if (alignment > 0)
3853			base = ROUNDDOWN(base, alignment);
3854
3855		if (base >= KERNEL_BASE) {
3856			args->virtual_allocated_range[0].start = base;
3857			args->virtual_allocated_range[0].size += rangeStart - base;
3858			return base;
3859		}
3860	}
3861
3862	return 0;
3863}
3864
3865
3866static bool
3867is_page_in_physical_memory_range(kernel_args* args, phys_addr_t address)
3868{
3869	// TODO: horrible brute-force method of determining if the page can be
3870	// allocated
3871	for (uint32 i = 0; i < args->num_physical_memory_ranges; i++) {
3872		if (address >= args->physical_memory_range[i].start
3873			&& address < args->physical_memory_range[i].start
3874				+ args->physical_memory_range[i].size)
3875			return true;
3876	}
3877	return false;
3878}
3879
3880
3881page_num_t
3882vm_allocate_early_physical_page(kernel_args* args)
3883{
3884	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3885		phys_addr_t nextPage;
3886
3887		nextPage = args->physical_allocated_range[i].start
3888			+ args->physical_allocated_range[i].size;
3889		// see if the page after the next allocated paddr run can be allocated
3890		if (i + 1 < args->num_physical_allocated_ranges
3891			&& args->physical_allocated_range[i + 1].size != 0) {
3892			// see if the next page will collide with the next allocated range
3893			if (nextPage >= args->physical_allocated_range[i+1].start)
3894				continue;
3895		}
3896		// see if the next physical page fits in the memory block
3897		if (is_page_in_physical_memory_range(args, nextPage)) {
3898			// we got one!
3899			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3900			return nextPage / B_PAGE_SIZE;
3901		}
3902	}
3903
3904	// Expanding upwards didn't work, try going downwards.
3905	for (uint32 i = 0; i < args->num_physical_allocated_ranges; i++) {
3906		phys_addr_t nextPage;
3907
3908		nextPage = args->physical_allocated_range[i].start - B_PAGE_SIZE;
3909		// see if the page after the prev allocated paddr run can be allocated
3910		if (i > 0 && args->physical_allocated_range[i - 1].size != 0) {
3911			// see if the next page will collide with the next allocated range
3912			if (nextPage < args->physical_allocated_range[i-1].start
3913				+ args->physical_allocated_range[i-1].size)
3914				continue;
3915		}
3916		// see if the next physical page fits in the memory block
3917		if (is_page_in_physical_memory_range(args, nextPage)) {
3918			// we got one!
3919			args->physical_allocated_range[i].start -= B_PAGE_SIZE;
3920			args->physical_allocated_range[i].size += B_PAGE_SIZE;
3921			return nextPage / B_PAGE_SIZE;
3922		}
3923	}
3924
3925	return 0;
3926		// could not allocate a block
3927}
3928
3929
3930/*!	This one uses the kernel_args' physical and virtual memory ranges to
3931	allocate some pages before the VM is completely up.
3932*/
3933addr_t
3934vm_allocate_early(kernel_args* args, size_t virtualSize, size_t physicalSize,
3935	uint32 attributes, addr_t alignment)
3936{
3937	if (physicalSize > virtualSize)
3938		physicalSize = virtualSize;
3939
3940	// find the vaddr to allocate at
3941	addr_t virtualBase = allocate_early_virtual(args, virtualSize, alignment);
3942	//dprintf("vm_allocate_early: vaddr 0x%lx\n", virtualBase);
3943	if (virtualBase == 0) {
3944		panic("vm_allocate_early: could not allocate virtual address\n");
3945		return 0;
3946	}
3947
3948	// map the pages
3949	for (uint32 i = 0; i < PAGE_ALIGN(physicalSize) / B_PAGE_SIZE; i++) {
3950		page_num_t physicalAddress = vm_allocate_early_physical_page(args);
3951		if (physicalAddress == 0)
3952			panic("error allocating early page!\n");
3953
3954		//dprintf("vm_allocate_early: paddr 0x%lx\n", physicalAddress);
3955
3956		arch_vm_translation_map_early_map(args, virtualBase + i * B_PAGE_SIZE,
3957			physicalAddress * B_PAGE_SIZE, attributes,
3958			&vm_allocate_early_physical_page);
3959	}
3960
3961	return virtualBase;
3962}
3963
3964
3965/*!	The main entrance point to initialize the VM. */
3966status_t
3967vm_init(kernel_args* args)
3968{
3969	struct preloaded_image* image;
3970	void* address;
3971	status_t err = 0;
3972	uint32 i;
3973
3974	TRACE(("vm_init: entry\n"));
3975	err = arch_vm_translation_map_init(args, &sPhysicalPageMapper);
3976	err = arch_vm_init(args);
3977
3978	// initialize some globals
3979	vm_page_init_num_pages(args);
3980	sAvailableMemory = vm_page_num_pages() * B_PAGE_SIZE;
3981
3982	slab_init(args);
3983
3984#if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
3985	off_t heapSize = INITIAL_HEAP_SIZE;
3986	// try to accomodate low memory systems
3987	while (heapSize > sAvailableMemory / 8)
3988		heapSize /= 2;
3989	if (heapSize < 1024 * 1024)
3990		panic("vm_init: go buy some RAM please.");
3991
3992	// map in the new heap and initialize it
3993	addr_t heapBase = vm_allocate_early(args, heapSize, heapSize,
3994		B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA, 0);
3995	TRACE(("heap at 0x%lx\n", heapBase));
3996	heap_init(heapBase, heapSize);
3997#endif
3998
3999	// initialize the free page list and physical page mapper
4000	vm_page_init(args);
4001
4002	// initialize the cache allocators
4003	vm_cache_init(args);
4004
4005	{
4006		status_t error = VMAreaHash::Init();
4007		if (error != B_OK)
4008			panic("vm_init: error initializing area hash table\n");
4009	}
4010
4011	VMAddressSpace::Init();
4012	reserve_boot_loader_ranges(args);
4013
4014#if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4015	heap_init_post_area();
4016#endif
4017
4018	// Do any further initialization that the architecture dependant layers may
4019	// need now
4020	arch_vm_translation_map_init_post_area(args);
4021	arch_vm_init_post_area(args);
4022	vm_page_init_post_area(args);
4023	slab_init_post_area();
4024
4025	// allocate areas to represent stuff that already exists
4026
4027#if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4028	address = (void*)ROUNDDOWN(heapBase, B_PAGE_SIZE);
4029	create_area("kernel heap", &address, B_EXACT_ADDRESS, heapSize,
4030		B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4031#endif
4032
4033	allocate_kernel_args(args);
4034
4035	create_preloaded_image_areas(args->kernel_image);
4036
4037	// allocate areas for preloaded images
4038	for (image = args->preloaded_images; image != NULL; image = image->next)
4039		create_preloaded_image_areas(image);
4040
4041	// allocate kernel stacks
4042	for (i = 0; i < args->num_cpus; i++) {
4043		char name[64];
4044
4045		sprintf(name, "idle thread %" B_PRIu32 " kstack", i + 1);
4046		address = (void*)args->cpu_kstack[i].start;
4047		create_area(name, &address, B_EXACT_ADDRESS, args->cpu_kstack[i].size,
4048			B_ALREADY_WIRED, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA);
4049	}
4050
4051	void* lastPage = (void*)ROUNDDOWN(~(addr_t)0, B_PAGE_SIZE);
4052	vm_block_address_range("overflow protection", lastPage, B_PAGE_SIZE);
4053
4054#if PARANOID_KERNEL_MALLOC
4055	vm_block_address_range("uninitialized heap memory",
4056		(void *)ROUNDDOWN(0xcccccccc, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4057#endif
4058#if PARANOID_KERNEL_FREE
4059	vm_block_address_range("freed heap memory",
4060		(void *)ROUNDDOWN(0xdeadbeef, B_PAGE_SIZE), B_PAGE_SIZE * 64);
4061#endif
4062
4063	// create the object cache for the page mappings
4064	gPageMappingsObjectCache = create_object_cache_etc("page mappings",
4065		sizeof(vm_page_mapping), 0, 0, 64, 128, CACHE_LARGE_SLAB, NULL, NULL,
4066		NULL, NULL);
4067	if (gPageMappingsObjectCache == NULL)
4068		panic("failed to create page mappings object cache");
4069
4070	object_cache_set_minimum_reserve(gPageMappingsObjectCache, 1024);
4071
4072#if DEBUG_CACHE_LIST
4073	if (vm_page_num_free_pages() >= 200 * 1024 * 1024 / B_PAGE_SIZE) {
4074		virtual_address_restrictions virtualRestrictions = {};
4075		virtualRestrictions.address_specification = B_ANY_KERNEL_ADDRESS;
4076		physical_address_restrictions physicalRestrictions = {};
4077		create_area_etc(VMAddressSpace::KernelID(), "cache info table",
4078			ROUNDUP(kCacheInfoTableCount * sizeof(cache_info), B_PAGE_SIZE),
4079			B_FULL_LOCK, B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA,
4080			CREATE_AREA_DONT_WAIT, 0, &virtualRestrictions,
4081			&physicalRestrictions, (void**)&sCacheInfoTable);
4082	}
4083#endif	// DEBUG_CACHE_LIST
4084
4085	// add some debugger commands
4086	add_debugger_command("areas", &dump_area_list, "Dump a list of all areas");
4087	add_debugger_command("area", &dump_area,
4088		"Dump info about a particular area");
4089	add_debugger_command("cache", &dump_cache, "Dump VMCache");
4090	add_debugger_command("cache_tree", &dump_cache_tree, "Dump VMCache tree");
4091#if DEBUG_CACHE_LIST
4092	if (sCacheInfoTable != NULL) {
4093		add_debugger_command_etc("caches", &dump_caches,
4094			"List all VMCache trees",
4095			"[ \"-c\" ]\n"
4096			"All cache trees are listed sorted in decreasing order by number "
4097				"of\n"
4098			"used pages or, if \"-c\" is specified, by size of committed "
4099				"memory.\n",
4100			0);
4101	}
4102#endif
4103	add_debugger_command("avail", &dump_available_memory,
4104		"Dump available memory");
4105	add_debugger_command("dl", &display_mem, "dump memory long words (64-bit)");
4106	add_debugger_command("dw", &display_mem, "dump memory words (32-bit)");
4107	add_debugger_command("ds", &display_mem, "dump memory shorts (16-bit)");
4108	add_debugger_command("db", &display_mem, "dump memory bytes (8-bit)");
4109	add_debugger_command("string", &display_mem, "dump strings");
4110
4111	add_debugger_command_etc("mapping", &dump_mapping_info,
4112		"Print address mapping information",
4113		"[ \"-r\" | \"-p\" ] <address> [ <thread ID> ]\n"
4114		"Prints low-level page mapping information for a given address. If\n"
4115		"neither \"-r\" nor \"-p\" are specified, <address> is a virtual\n"
4116		"address that is looked up in the translation map of the current\n"
4117		"team, respectively the team specified by thread ID <thread ID>. If\n"
4118		"\"-r\" is specified, <address> is a physical address that is\n"
4119		"searched in the translation map of all teams, respectively the team\n"
4120		"specified by thread ID <thread ID>. If \"-p\" is specified,\n"
4121		"<address> is the address of a vm_page structure. The behavior is\n"
4122		"equivalent to specifying \"-r\" with the physical address of that\n"
4123		"page.\n",
4124		0);
4125
4126	TRACE(("vm_init: exit\n"));
4127
4128	vm_cache_init_post_heap();
4129
4130	return err;
4131}
4132
4133
4134status_t
4135vm_init_post_sem(kernel_args* args)
4136{
4137	// This frees all unused boot loader resources and makes its space available
4138	// again
4139	arch_vm_init_end(args);
4140	unreserve_boot_loader_ranges(args);
4141
4142	// fill in all of the semaphores that were not allocated before
4143	// since we're still single threaded and only the kernel address space
4144	// exists, it isn't that hard to find all of the ones we need to create
4145
4146	arch_vm_translation_map_init_post_sem(args);
4147
4148	slab_init_post_sem();
4149
4150#if USE_DEBUG_HEAP_FOR_MALLOC || USE_GUARDED_HEAP_FOR_MALLOC
4151	heap_init_post_sem();
4152#endif
4153
4154	return B_OK;
4155}
4156
4157
4158status_t
4159vm_init_post_thread(kernel_args* args)
4160{
4161	vm_page_init_post_thread(args);
4162	slab_init_post_thread();
4163	return heap_init_post_thread();
4164}
4165
4166
4167status_t
4168vm_init_post_modules(kernel_args* args)
4169{
4170	return arch_vm_init_post_modules(args);
4171}
4172
4173
4174void
4175permit_page_faults(void)
4176{
4177	Thread* thread = thread_get_current_thread();
4178	if (thread != NULL)
4179		atomic_add(&thread->page_faults_allowed, 1);
4180}
4181
4182
4183void
4184forbid_page_faults(void)
4185{
4186	Thread* thread = thread_get_current_thread();
4187	if (thread != NULL)
4188		atomic_add(&thread->page_faults_allowed, -1);
4189}
4190
4191
4192status_t
4193vm_page_fault(addr_t address, addr_t faultAddress, bool isWrite, bool isExecute,
4194	bool isUser, addr_t* newIP)
4195{
4196	FTRACE(("vm_page_fault: page fault at 0x%lx, ip 0x%lx\n", address,
4197		faultAddress));
4198
4199	TPF(PageFaultStart(address, isWrite, isUser, faultAddress));
4200
4201	addr_t pageAddress = ROUNDDOWN(address, B_PAGE_SIZE);
4202	VMAddressSpace* addressSpace = NULL;
4203
4204	status_t status = B_OK;
4205	*newIP = 0;
4206	atomic_add((int32*)&sPageFaults, 1);
4207
4208	if (IS_KERNEL_ADDRESS(pageAddress)) {
4209		addressSpace = VMAddressSpace::GetKernel();
4210	} else if (IS_USER_ADDRESS(pageAddress)) {
4211		addressSpace = VMAddressSpace::GetCurrent();
4212		if (addressSpace == NULL) {
4213			if (!isUser) {
4214				dprintf("vm_page_fault: kernel thread accessing invalid user "
4215					"memory!\n");
4216				status = B_BAD_ADDRESS;
4217				TPF(PageFaultError(-1,
4218					VMPageFaultTracing
4219						::PAGE_FAULT_ERROR_KERNEL_BAD_USER_MEMORY));
4220			} else {
4221				// XXX weird state.
4222				panic("vm_page_fault: non kernel thread accessing user memory "
4223					"that doesn't exist!\n");
4224				status = B_BAD_ADDRESS;
4225			}
4226		}
4227	} else {
4228		// the hit was probably in the 64k DMZ between kernel and user space
4229		// this keeps a user space thread from passing a buffer that crosses
4230		// into kernel space
4231		status = B_BAD_ADDRESS;
4232		TPF(PageFaultError(-1,
4233			VMPageFaultTracing::PAGE_FAULT_ERROR_NO_ADDRESS_SPACE));
4234	}
4235
4236	if (status == B_OK) {
4237		status = vm_soft_fault(addressSpace, pageAddress, isWrite, isExecute,
4238			isUser, NULL);
4239	}
4240
4241	if (status < B_OK) {
4242		dprintf("vm_page_fault: vm_soft_fault returned error '%s' on fault at "
4243			"0x%lx, ip 0x%lx, write %d, user %d, thread 0x%" B_PRIx32 "\n",
4244			strerror(status), address, faultAddress, isWrite, isUser,
4245			thread_get_current_thread_id());
4246		if (!isUser) {
4247			Thread* thread = thread_get_current_thread();
4248			if (thread != NULL && thread->fault_handler != 0) {
4249				// this will cause the arch dependant page fault handler to
4250				// modify the IP on the interrupt frame or whatever to return
4251				// to this address
4252				*newIP = reinterpret_cast<uintptr_t>(thread->fault_handler);
4253			} else {
4254				// unhandled page fault in the kernel
4255				panic("vm_page_fault: unhandled page fault in kernel space at "
4256					"0x%lx, ip 0x%lx\n", address, faultAddress);
4257			}
4258		} else {
4259			Thread* thread = thread_get_current_thread();
4260
4261#ifdef TRACE_FAULTS
4262			VMArea* area = NULL;
4263			if (addressSpace != NULL) {
4264				addressSpace->ReadLock();
4265				area = addressSpace->LookupArea(faultAddress);
4266			}
4267
4268			dprintf("vm_page_fault: thread \"%s\" (%" B_PRId32 ") in team "
4269				"\"%s\" (%" B_PRId32 ") tried to %s address %#lx, ip %#lx "
4270				"(\"%s\" +%#lx)\n", thread->name, thread->id,
4271				thread->team->Name(), thread->team->id,
4272				isWrite ? "write" : (isExecute ? "execute" : "read"), address,
4273				faultAddress, area ? area->name : "???", faultAddress - (area ?
4274					area->Base() : 0x0));
4275
4276			if (addressSpace != NULL)
4277				addressSpace->ReadUnlock();
4278#endif
4279
4280			// If the thread has a signal handler for SIGSEGV, we simply
4281			// send it the signal. Otherwise we notify the user debugger
4282			// first.
4283			struct sigaction action;
4284			if ((sigaction(SIGSEGV, NULL, &action) == 0
4285					&& action.sa_handler != SIG_DFL
4286					&& action.sa_handler != SIG_IGN)
4287				|| user_debug_exception_occurred(B_SEGMENT_VIOLATION,
4288					SIGSEGV)) {
4289				Signal signal(SIGSEGV,
4290					status == B_PERMISSION_DENIED
4291						? SEGV_ACCERR : SEGV_MAPERR,
4292					EFAULT, thread->team->id);
4293				signal.SetAddress((void*)address);
4294				send_signal_to_thread(thread, signal, 0);
4295			}
4296		}
4297	}
4298
4299	if (addressSpace != NULL)
4300		addressSpace->Put();
4301
4302	return B_HANDLED_INTERRUPT;
4303}
4304
4305
4306struct PageFaultContext {
4307	AddressSpaceReadLocker	addressSpaceLocker;
4308	VMCacheChainLocker		cacheChainLocker;
4309
4310	VMTranslationMap*		map;
4311	VMCache*				topCache;
4312	off_t					cacheOffset;
4313	vm_page_reservation		reservation;
4314	bool					isWrite;
4315
4316	// return values
4317	vm_page*				page;
4318	bool					restart;
4319	bool					pageAllocated;
4320
4321
4322	PageFaultContext(VMAddressSpace* addressSpace, bool isWrite)
4323		:
4324		addressSpaceLocker(addressSpace, true),
4325		map(addressSpace->TranslationMap()),
4326		isWrite(isWrite)
4327	{
4328	}
4329
4330	~PageFaultContext()
4331	{
4332		UnlockAll();
4333		vm_page_unreserve_pages(&reservation);
4334	}
4335
4336	void Prepare(VMCache* topCache, off_t cacheOffset)
4337	{
4338		this->topCache = topCache;
4339		this->cacheOffset = cacheOffset;
4340		page = NULL;
4341		restart = false;
4342		pageAllocated = false;
4343
4344		cacheChainLocker.SetTo(topCache);
4345	}
4346
4347	void UnlockAll(VMCache* exceptCache = NULL)
4348	{
4349		topCache = NULL;
4350		addressSpaceLocker.Unlock();
4351		cacheChainLocker.Unlock(exceptCache);
4352	}
4353};
4354
4355
4356/*!	Gets the page that should be mapped into the area.
4357	Returns an error code other than \c B_OK, if the page couldn't be found or
4358	paged in. The locking state of the address space and the caches is undefined
4359	in that case.
4360	Returns \c B_OK with \c context.restart set to \c true, if the functions
4361	had to unlock the address space and all caches and is supposed to be called
4362	again.
4363	Returns \c B_OK with \c context.restart set to \c false, if the page was
4364	found. It is returned in \c context.page. The address space will still be
4365	locked as well as all caches starting from the top cache to at least the
4366	cache the page lives in.
4367*/
4368static status_t
4369fault_get_page(PageFaultContext& context)
4370{
4371	VMCache* cache = context.topCache;
4372	VMCache* lastCache = NULL;
4373	vm_page* page = NULL;
4374
4375	while (cache != NULL) {
4376		// We already hold the lock of the cache at this point.
4377
4378		lastCache = cache;
4379
4380		page = cache->LookupPage(context.cacheOffset);
4381		if (page != NULL && page->busy) {
4382			// page must be busy -- wait for it to become unbusy
4383			context.UnlockAll(cache);
4384			cache->ReleaseRefLocked();
4385			cache->WaitForPageEvents(page, PAGE_EVENT_NOT_BUSY, false);
4386
4387			// restart the whole process
4388			context.restart = true;
4389			return B_OK;
4390		}
4391
4392		if (page != NULL)
4393			break;
4394
4395		// The current cache does not contain the page we're looking for.
4396
4397		// see if the backing store has it
4398		if (cache->HasPage(context.cacheOffset)) {
4399			// insert a fresh page and mark it busy -- we're going to read it in
4400			page = vm_page_allocate_page(&context.reservation,
4401				PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_BUSY);
4402			cache->InsertPage(page, context.cacheOffset);
4403
4404			// We need to unlock all caches and the address space while reading
4405			// the page in. Keep a reference to the cache around.
4406			cache->AcquireRefLocked();
4407			context.UnlockAll();
4408
4409			// read the page in
4410			generic_io_vec vec;
4411			vec.base = (phys_addr_t)page->physical_page_number * B_PAGE_SIZE;
4412			generic_size_t bytesRead = vec.length = B_PAGE_SIZE;
4413
4414			status_t status = cache->Read(context.cacheOffset, &vec, 1,
4415				B_PHYSICAL_IO_REQUEST, &bytesRead);
4416
4417			cache->Lock();
4418
4419			if (status < B_OK) {
4420				// on error remove and free the page
4421				dprintf("reading page from cache %p returned: %s!\n",
4422					cache, strerror(status));
4423
4424				cache->NotifyPageEvents(page, PAGE_EVENT_NOT_BUSY);
4425				cache->RemovePage(page);
4426				vm_page_set_state(page, PAGE_STATE_FREE);
4427
4428				cache->ReleaseRefAndUnlock();
4429				return status;
4430			}
4431
4432			// mark the page unbusy again
4433			cache->MarkPageUnbusy(page);
4434
4435			DEBUG_PAGE_ACCESS_END(page);
4436
4437			// Since we needed to unlock everything temporarily, the area
4438			// situation might have changed. So we need to restart the whole
4439			// process.
4440			cache->ReleaseRefAndUnlock();
4441			context.restart = true;
4442			return B_OK;
4443		}
4444
4445		cache = context.cacheChainLocker.LockSourceCache();
4446	}
4447
4448	if (page == NULL) {
4449		// There was no adequate page, determine the cache for a clean one.
4450		// Read-only pages come in the deepest cache, only the top most cache
4451		// may have direct write access.
4452		cache = context.isWrite ? context.topCache : lastCache;
4453
4454		// allocate a clean page
4455		page = vm_page_allocate_page(&context.reservation,
4456			PAGE_STATE_ACTIVE | VM_PAGE_ALLOC_CLEAR);
4457		FTRACE(("vm_soft_fault: just allocated page 0x%" B_PRIxPHYSADDR "\n",
4458			page->physical_page_number));
4459
4460		// insert the new page into our cache
4461		cache->InsertPage(page, context.cacheOffset);
4462		context.pageAllocated = true;
4463	} else if (page->Cache() != context.topCache && context.isWrite) {
4464		// We have a page that has the data we want, but in the wrong cache
4465		// object so we need to copy it and stick it into the top cache.
4466		vm_page* sourcePage = page;
4467
4468		// TODO: If memory is low, it might be a good idea to steal the page
4469		// from our source cache -- if possible, that is.
4470		FTRACE(("get new page, copy it, and put it into the topmost cache\n"));
4471		page = vm_page_allocate_page(&context.reservation, PAGE_STATE_ACTIVE);
4472
4473		// To not needlessly kill concurrency we unlock all caches but the top
4474		// one while copying the page. Lacking another mechanism to ensure that
4475		// the source page doesn't disappear, we mark it busy.
4476		sourcePage->busy = true;
4477		context.cacheChainLocker.UnlockKeepRefs(true);
4478
4479		// copy the page
4480		vm_memcpy_physical_page(page->physical_page_number * B_PAGE_SIZE,
4481			sourcePage->physical_page_number * B_PAGE_SIZE);
4482
4483		context.cacheChainLocker.RelockCaches(true);
4484		sourcePage->Cache()->MarkPageUnbusy(sourcePage);
4485
4486		// insert the new page into our cache
4487		context.topCache->InsertPage(page, context.cacheOffset);
4488		context.pageAllocated = true;
4489	} else
4490		DEBUG_PAGE_ACCESS_START(page);
4491
4492	context.page = page;
4493	return B_OK;
4494}
4495
4496
4497/*!	Makes sure the address in the given address space is mapped.
4498
4499	\param addressSpace The address space.
4500	\param originalAddress The address. Doesn't need to be page aligned.
4501	\param isWrite If \c true the address shall be write-accessible.
4502	\param isUser If \c true the access is requested by a userland team.
4503	\param wirePage On success, if non \c NULL, the wired count of the page
4504		mapped at the given address is incremented and the page is returned
4505		via this parameter.
4506	\return \c B_OK on success, another error code otherwise.
4507*/
4508static status_t
4509vm_soft_fault(VMAddressSpace* addressSpace, addr_t originalAddress,
4510	bool isWrite, bool isExecute, bool isUser, vm_page** wirePage)
4511{
4512	FTRACE(("vm_soft_fault: thid 0x%" B_PRIx32 " address 0x%" B_PRIxADDR ", "
4513		"isWrite %d, isUser %d\n", thread_get_current_thread_id(),
4514		originalAddress, isWrite, isUser));
4515
4516	PageFaultContext context(addressSpace, isWrite);
4517
4518	addr_t address = ROUNDDOWN(originalAddress, B_PAGE_SIZE);
4519	status_t status = B_OK;
4520
4521	addressSpace->IncrementFaultCount();
4522
4523	// We may need up to 2 pages plus pages needed for mapping them -- reserving
4524	// the pages upfront makes sure we don't have any cache locked, so that the
4525	// page daemon/thief can do their job without problems.
4526	size_t reservePages = 2 + context.map->MaxPagesNeededToMap(originalAddress,
4527		originalAddress);
4528	context.addressSpaceLocker.Unlock();
4529	vm_page_reserve_pages(&context.reservation, reservePages,
4530		addressSpace == VMAddressSpace::Kernel()
4531			? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER);
4532
4533	while (true) {
4534		context.addressSpaceLocker.Lock();
4535
4536		// get the area the fault was in
4537		VMArea* area = addressSpace->LookupArea(address);
4538		if (area == NULL) {
4539			dprintf("vm_soft_fault: va 0x%lx not covered by area in address "
4540				"space\n", originalAddress);
4541			TPF(PageFaultError(-1,
4542				VMPageFaultTracing::PAGE_FAULT_ERROR_NO_AREA));
4543			status = B_BAD_ADDRESS;
4544			break;
4545		}
4546
4547		// check permissions
4548		uint32 protection = get_area_page_protection(area, address);
4549		if (isUser && (protection & B_USER_PROTECTION) == 0) {
4550			dprintf("user access on kernel area 0x%" B_PRIx32 " at %p\n",
4551				area->id, (void*)originalAddress);
4552			TPF(PageFaultError(area->id,
4553				VMPageFaultTracing::PAGE_FAULT_ERROR_KERNEL_ONLY));
4554			status = B_PERMISSION_DENIED;
4555			break;
4556		}
4557		if (isWrite && (protection
4558				& (B_WRITE_AREA | (isUser ? 0 : B_KERNEL_WRITE_AREA))) == 0) {
4559			dprintf("write access attempted on write-protected area 0x%"
4560				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4561			TPF(PageFaultError(area->id,
4562				VMPageFaultTracing::PAGE_FAULT_ERROR_WRITE_PROTECTED));
4563			status = B_PERMISSION_DENIED;
4564			break;
4565		} else if (isExecute && (protection
4566				& (B_EXECUTE_AREA
4567					| (isUser ? 0 : B_KERNEL_EXECUTE_AREA))) == 0) {
4568			dprintf("instruction fetch attempted on execute-protected area 0x%"
4569				B_PRIx32 " at %p\n", area->id, (void*)originalAddress);
4570			TPF(PageFaultError(area->id,
4571				VMPageFaultTracing::PAGE_FAULT_ERROR_EXECUTE_PROTECTED));
4572			status = B_PERMISSION_DENIED;
4573			break;
4574		} else if (!isWrite && !isExecute && (protection
4575				& (B_READ_AREA | (isUser ? 0 : B_KERNEL_READ_AREA))) == 0) {
4576			dprintf("read access attempted on read-protected area 0x%" B_PRIx32
4577				" at %p\n", area->id, (void*)originalAddress);
4578			TPF(PageFaultError(area->id,
4579				VMPageFaultTracing::PAGE_FAULT_ERROR_READ_PROTECTED));
4580			status = B_PERMISSION_DENIED;
4581			break;
4582		}
4583
4584		// We have the area, it was a valid access, so let's try to resolve the
4585		// page fault now.
4586		// At first, the top most cache from the area is investigated.
4587
4588		context.Prepare(vm_area_get_locked_cache(area),
4589			address - area->Base() + area->cache_offset);
4590
4591		// See if this cache has a fault handler -- this will do all the work
4592		// for us.
4593		{
4594			// Note, since the page fault is resolved with interrupts enabled,
4595			// the fault handler could be called more than once for the same
4596			// reason -- the store must take this into account.
4597			status = context.topCache->Fault(addressSpace, context.cacheOffset);
4598			if (status != B_BAD_HANDLER)
4599				break;
4600		}
4601
4602		// The top most cache has no fault handler, so let's see if the cache or
4603		// its sources already have the page we're searching for (we're going
4604		// from top to bottom).
4605		status = fault_get_page(context);
4606		if (status != B_OK) {
4607			TPF(PageFaultError(area->id, status));
4608			break;
4609		}
4610
4611		if (context.restart)
4612			continue;
4613
4614		// All went fine, all there is left to do is to map the page into the
4615		// address space.
4616		TPF(PageFaultDone(area->id, context.topCache, context.page->Cache(),
4617			context.page));
4618
4619		// If the page doesn't reside in the area's cache, we need to make sure
4620		// it's mapped in read-only, so that we cannot overwrite someone else's
4621		// data (copy-on-write)
4622		uint32 newProtection = protection;
4623		if (context.page->Cache() != context.topCache && !isWrite)
4624			newProtection &= ~(B_WRITE_AREA | B_KERNEL_WRITE_AREA);
4625
4626		bool unmapPage = false;
4627		bool mapPage = true;
4628
4629		// check whether there's already a page mapped at the address
4630		context.map->Lock();
4631
4632		phys_addr_t physicalAddress;
4633		uint32 flags;
4634		vm_page* mappedPage = NULL;
4635		if (context.map->Query(address, &physicalAddress, &flags) == B_OK
4636			&& (flags & PAGE_PRESENT) != 0
4637			&& (mappedPage = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
4638				!= NULL) {
4639			// Yep there's already a page. If it's ours, we can simply adjust
4640			// its protection. Otherwise we have to unmap it.
4641			if (mappedPage == context.page) {
4642				context.map->ProtectPage(area, address, newProtection);
4643					// Note: We assume that ProtectPage() is atomic (i.e.
4644					// the page isn't temporarily unmapped), otherwise we'd have
4645					// to make sure it isn't wired.
4646				mapPage = false;
4647			} else
4648				unmapPage = true;
4649		}
4650
4651		context.map->Unlock();
4652
4653		if (unmapPage) {
4654			// If the page is wired, we can't unmap it. Wait until it is unwired
4655			// again and restart. Note that the page cannot be wired for
4656			// writing, since it it isn't in the topmost cache. So we can safely
4657			// ignore ranges wired for writing (our own and other concurrent
4658			// wiring attempts in progress) and in fact have to do that to avoid
4659			// a deadlock.
4660			VMAreaUnwiredWaiter waiter;
4661			if (area->AddWaiterIfWired(&waiter, address, B_PAGE_SIZE,
4662					VMArea::IGNORE_WRITE_WIRED_RANGES)) {
4663				// unlock everything and wait
4664				if (context.pageAllocated) {
4665					// ... but since we allocated a page and inserted it into
4666					// the top cache, remove and free it first. Otherwise we'd
4667					// have a page from a lower cache mapped while an upper
4668					// cache has a page that would shadow it.
4669					context.topCache->RemovePage(context.page);
4670					vm_page_free_etc(context.topCache, context.page,
4671						&context.reservation);
4672				} else
4673					DEBUG_PAGE_ACCESS_END(context.page);
4674
4675				context.UnlockAll();
4676				waiter.waitEntry.Wait();
4677				continue;
4678			}
4679
4680			// Note: The mapped page is a page of a lower cache. We are
4681			// guaranteed to have that cached locked, our new page is a copy of
4682			// that page, and the page is not busy. The logic for that guarantee
4683			// is as follows: Since the page is mapped, it must live in the top
4684			// cache (ruled out above) or any of its lower caches, and there is
4685			// (was before the new page was inserted) no other page in any
4686			// cache between the top cache and the page's cache (otherwise that
4687			// would be mapped instead). That in turn means that our algorithm
4688			// must have found it and therefore it cannot be busy either.
4689			DEBUG_PAGE_ACCESS_START(mappedPage);
4690			unmap_page(area, address);
4691			DEBUG_PAGE_ACCESS_END(mappedPage);
4692		}
4693
4694		if (mapPage) {
4695			if (map_page(area, context.page, address, newProtection,
4696					&context.reservation) != B_OK) {
4697				// Mapping can only fail, when the page mapping object couldn't
4698				// be allocated. Save for the missing mapping everything is
4699				// fine, though. If this was a regular page fault, we'll simply
4700				// leave and probably fault again. To make sure we'll have more
4701				// luck then, we ensure that the minimum object reserve is
4702				// available.
4703				DEBUG_PAGE_ACCESS_END(context.page);
4704
4705				context.UnlockAll();
4706
4707				if (object_cache_reserve(gPageMappingsObjectCache, 1, 0)
4708						!= B_OK) {
4709					// Apparently the situation is serious. Let's get ourselves
4710					// killed.
4711					status = B_NO_MEMORY;
4712				} else if (wirePage != NULL) {
4713					// The caller expects us to wire the page. Since
4714					// object_cache_reserve() succeeded, we should now be able
4715					// to allocate a mapping structure. Restart.
4716					continue;
4717				}
4718
4719				break;
4720			}
4721		} else if (context.page->State() == PAGE_STATE_INACTIVE)
4722			vm_page_set_state(context.page, PAGE_STATE_ACTIVE);
4723
4724		// also wire the page, if requested
4725		if (wirePage != NULL && status == B_OK) {
4726			increment_page_wired_count(context.page);
4727			*wirePage = context.page;
4728		}
4729
4730		DEBUG_PAGE_ACCESS_END(context.page);
4731
4732		break;
4733	}
4734
4735	return status;
4736}
4737
4738
4739status_t
4740vm_get_physical_page(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4741{
4742	return sPhysicalPageMapper->GetPage(paddr, _vaddr, _handle);
4743}
4744
4745status_t
4746vm_put_physical_page(addr_t vaddr, void* handle)
4747{
4748	return sPhysicalPageMapper->PutPage(vaddr, handle);
4749}
4750
4751
4752status_t
4753vm_get_physical_page_current_cpu(phys_addr_t paddr, addr_t* _vaddr,
4754	void** _handle)
4755{
4756	return sPhysicalPageMapper->GetPageCurrentCPU(paddr, _vaddr, _handle);
4757}
4758
4759status_t
4760vm_put_physical_page_current_cpu(addr_t vaddr, void* handle)
4761{
4762	return sPhysicalPageMapper->PutPageCurrentCPU(vaddr, handle);
4763}
4764
4765
4766status_t
4767vm_get_physical_page_debug(phys_addr_t paddr, addr_t* _vaddr, void** _handle)
4768{
4769	return sPhysicalPageMapper->GetPageDebug(paddr, _vaddr, _handle);
4770}
4771
4772status_t
4773vm_put_physical_page_debug(addr_t vaddr, void* handle)
4774{
4775	return sPhysicalPageMapper->PutPageDebug(vaddr, handle);
4776}
4777
4778
4779void
4780vm_get_info(system_info* info)
4781{
4782	swap_get_info(info);
4783
4784	MutexLocker locker(sAvailableMemoryLock);
4785	info->needed_memory = sNeededMemory;
4786	info->free_memory = sAvailableMemory;
4787}
4788
4789
4790uint32
4791vm_num_page_faults(void)
4792{
4793	return sPageFaults;
4794}
4795
4796
4797off_t
4798vm_available_memory(void)
4799{
4800	MutexLocker locker(sAvailableMemoryLock);
4801	return sAvailableMemory;
4802}
4803
4804
4805off_t
4806vm_available_not_needed_memory(void)
4807{
4808	MutexLocker locker(sAvailableMemoryLock);
4809	return sAvailableMemory - sNeededMemory;
4810}
4811
4812
4813/*!	Like vm_available_not_needed_memory(), but only for use in the kernel
4814	debugger.
4815*/
4816off_t
4817vm_available_not_needed_memory_debug(void)
4818{
4819	return sAvailableMemory - sNeededMemory;
4820}
4821
4822
4823size_t
4824vm_kernel_address_space_left(void)
4825{
4826	return VMAddressSpace::Kernel()->FreeSpace();
4827}
4828
4829
4830void
4831vm_unreserve_memory(size_t amount)
4832{
4833	mutex_lock(&sAvailableMemoryLock);
4834
4835	sAvailableMemory += amount;
4836
4837	mutex_unlock(&sAvailableMemoryLock);
4838}
4839
4840
4841status_t
4842vm_try_reserve_memory(size_t amount, int priority, bigtime_t timeout)
4843{
4844	size_t reserve = kMemoryReserveForPriority[priority];
4845
4846	MutexLocker locker(sAvailableMemoryLock);
4847
4848	//dprintf("try to reserve %lu bytes, %Lu left\n", amount, sAvailableMemory);
4849
4850	if (sAvailableMemory >= (off_t)(amount + reserve)) {
4851		sAvailableMemory -= amount;
4852		return B_OK;
4853	}
4854
4855	if (timeout <= 0)
4856		return B_NO_MEMORY;
4857
4858	// turn timeout into an absolute timeout
4859	timeout += system_time();
4860
4861	// loop until we've got the memory or the timeout occurs
4862	do {
4863		sNeededMemory += amount;
4864
4865		// call the low resource manager
4866		locker.Unlock();
4867		low_resource(B_KERNEL_RESOURCE_MEMORY, sNeededMemory - sAvailableMemory,
4868			B_ABSOLUTE_TIMEOUT, timeout);
4869		locker.Lock();
4870
4871		sNeededMemory -= amount;
4872
4873		if (sAvailableMemory >= (off_t)(amount + reserve)) {
4874			sAvailableMemory -= amount;
4875			return B_OK;
4876		}
4877	} while (timeout > system_time());
4878
4879	return B_NO_MEMORY;
4880}
4881
4882
4883status_t
4884vm_set_area_memory_type(area_id id, phys_addr_t physicalBase, uint32 type)
4885{
4886	// NOTE: The caller is responsible for synchronizing calls to this function!
4887
4888	AddressSpaceReadLocker locker;
4889	VMArea* area;
4890	status_t status = locker.SetFromArea(id, area);
4891	if (status != B_OK)
4892		return status;
4893
4894	// nothing to do, if the type doesn't change
4895	uint32 oldType = area->MemoryType();
4896	if (type == oldType)
4897		return B_OK;
4898
4899	// set the memory type of the area and the mapped pages
4900	VMTranslationMap* map = area->address_space->TranslationMap();
4901	map->Lock();
4902	area->SetMemoryType(type);
4903	map->ProtectArea(area, area->protection);
4904	map->Unlock();
4905
4906	// set the physical memory type
4907	status_t error = arch_vm_set_memory_type(area, physicalBase, type);
4908	if (error != B_OK) {
4909		// reset the memory type of the area and the mapped pages
4910		map->Lock();
4911		area->SetMemoryType(oldType);
4912		map->ProtectArea(area, area->protection);
4913		map->Unlock();
4914		return error;
4915	}
4916
4917	return B_OK;
4918
4919}
4920
4921
4922/*!	This function enforces some protection properties:
4923	 - kernel areas must be W^X (after kernel startup)
4924	 - if B_WRITE_AREA is set, B_KERNEL_WRITE_AREA is set as well
4925	 - if only B_READ_AREA has been set, B_KERNEL_READ_AREA is also set
4926	 - if no protection is specified, it defaults to B_KERNEL_READ_AREA
4927	   and B_KERNEL_WRITE_AREA.
4928*/
4929static void
4930fix_protection(uint32* protection)
4931{
4932	if ((*protection & B_KERNEL_EXECUTE_AREA) != 0
4933		&& ((*protection & B_KERNEL_WRITE_AREA) != 0
4934			|| (*protection & B_WRITE_AREA) != 0)
4935		&& !gKernelStartup)
4936		panic("kernel areas cannot be both writable and executable!");
4937
4938	if ((*protection & B_KERNEL_PROTECTION) == 0) {
4939		if ((*protection & B_USER_PROTECTION) == 0
4940			|| (*protection & B_WRITE_AREA) != 0)
4941			*protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
4942		else
4943			*protection |= B_KERNEL_READ_AREA;
4944	}
4945}
4946
4947
4948static void
4949fill_area_info(struct VMArea* area, area_info* info, size_t size)
4950{
4951	strlcpy(info->name, area->name, B_OS_NAME_LENGTH);
4952	info->area = area->id;
4953	info->address = (void*)area->Base();
4954	info->size = area->Size();
4955	info->protection = area->protection;
4956	info->lock = B_FULL_LOCK;
4957	info->team = area->address_space->ID();
4958	info->copy_count = 0;
4959	info->in_count = 0;
4960	info->out_count = 0;
4961		// TODO: retrieve real values here!
4962
4963	VMCache* cache = vm_area_get_locked_cache(area);
4964
4965	// Note, this is a simplification; the cache could be larger than this area
4966	info->ram_size = cache->page_count * B_PAGE_SIZE;
4967
4968	vm_area_put_locked_cache(cache);
4969}
4970
4971
4972static status_t
4973vm_resize_area(area_id areaID, size_t newSize, bool kernel)
4974{
4975	// is newSize a multiple of B_PAGE_SIZE?
4976	if (newSize & (B_PAGE_SIZE - 1))
4977		return B_BAD_VALUE;
4978
4979	// lock all affected address spaces and the cache
4980	VMArea* area;
4981	VMCache* cache;
4982
4983	MultiAddressSpaceLocker locker;
4984	AreaCacheLocker cacheLocker;
4985
4986	status_t status;
4987	size_t oldSize;
4988	bool anyKernelArea;
4989	bool restart;
4990
4991	do {
4992		anyKernelArea = false;
4993		restart = false;
4994
4995		locker.Unset();
4996		status = locker.AddAreaCacheAndLock(areaID, true, true, area, &cache);
4997		if (status != B_OK)
4998			return status;
4999		cacheLocker.SetTo(cache, true);	// already locked
5000
5001		// enforce restrictions
5002		if (!kernel && area->address_space == VMAddressSpace::Kernel()) {
5003			dprintf("vm_resize_area: team %" B_PRId32 " tried to "
5004				"resize kernel area %" B_PRId32 " (%s)\n",
5005				team_get_current_team_id(), areaID, area->name);
5006			return B_NOT_ALLOWED;
5007		}
5008		// TODO: Enforce all restrictions (team, etc.)!
5009
5010		oldSize = area->Size();
5011		if (newSize == oldSize)
5012			return B_OK;
5013
5014		if (cache->type != CACHE_TYPE_RAM)
5015			return B_NOT_ALLOWED;
5016
5017		if (oldSize < newSize) {
5018			// We need to check if all areas of this cache can be resized.
5019			for (VMArea* current = cache->areas; current != NULL;
5020					current = current->cache_next) {
5021				if (!current->address_space->CanResizeArea(current, newSize))
5022					return B_ERROR;
5023				anyKernelArea
5024					|= current->address_space == VMAddressSpace::Kernel();
5025			}
5026		} else {
5027			// We're shrinking the areas, so we must make sure the affected
5028			// ranges are not wired.
5029			for (VMArea* current = cache->areas; current != NULL;
5030					current = current->cache_next) {
5031				anyKernelArea
5032					|= current->address_space == VMAddressSpace::Kernel();
5033
5034				if (wait_if_area_range_is_wired(current,
5035						current->Base() + newSize, oldSize - newSize, &locker,
5036						&cacheLocker)) {
5037					restart = true;
5038					break;
5039				}
5040			}
5041		}
5042	} while (restart);
5043
5044	// Okay, looks good so far, so let's do it
5045
5046	int priority = kernel && anyKernelArea
5047		? VM_PRIORITY_SYSTEM : VM_PRIORITY_USER;
5048	uint32 allocationFlags = kernel && anyKernelArea
5049		? HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE : 0;
5050
5051	if (oldSize < newSize) {
5052		// Growing the cache can fail, so we do it first.
5053		status = cache->Resize(cache->virtual_base + newSize, priority);
5054		if (status != B_OK)
5055			return status;
5056	}
5057
5058	for (VMArea* current = cache->areas; current != NULL;
5059			current = current->cache_next) {
5060		status = current->address_space->ResizeArea(current, newSize,
5061			allocationFlags);
5062		if (status != B_OK)
5063			break;
5064
5065		// We also need to unmap all pages beyond the new size, if the area has
5066		// shrunk
5067		if (newSize < oldSize) {
5068			VMCacheChainLocker cacheChainLocker(cache);
5069			cacheChainLocker.LockAllSourceCaches();
5070
5071			unmap_pages(current, current->Base() + newSize,
5072				oldSize - newSize);
5073
5074			cacheChainLocker.Unlock(cache);
5075		}
5076	}
5077
5078	if (status == B_OK) {
5079		// Shrink or grow individual page protections if in use.
5080		if (area->page_protections != NULL) {
5081			uint32 bytes = (newSize / B_PAGE_SIZE + 1) / 2;
5082			uint8* newProtections
5083				= (uint8*)realloc(area->page_protections, bytes);
5084			if (newProtections == NULL)
5085				status = B_NO_MEMORY;
5086			else {
5087				area->page_protections = newProtections;
5088
5089				if (oldSize < newSize) {
5090					// init the additional page protections to that of the area
5091					uint32 offset = (oldSize / B_PAGE_SIZE + 1) / 2;
5092					uint32 areaProtection = area->protection
5093						& (B_READ_AREA | B_WRITE_AREA | B_EXECUTE_AREA);
5094					memset(area->page_protections + offset,
5095						areaProtection | (areaProtection << 4), bytes - offset);
5096					if ((oldSize / B_PAGE_SIZE) % 2 != 0) {
5097						uint8& entry = area->page_protections[offset - 1];
5098						entry = (entry & 0x0f) | (areaProtection << 4);
5099					}
5100				}
5101			}
5102		}
5103	}
5104
5105	// shrinking the cache can't fail, so we do it now
5106	if (status == B_OK && newSize < oldSize)
5107		status = cache->Resize(cache->virtual_base + newSize, priority);
5108
5109	if (status != B_OK) {
5110		// Something failed -- resize the areas back to their original size.
5111		// This can fail, too, in which case we're seriously screwed.
5112		for (VMArea* current = cache->areas; current != NULL;
5113				current = current->cache_next) {
5114			if (current->address_space->ResizeArea(current, oldSize,
5115					allocationFlags) != B_OK) {
5116				panic("vm_resize_area(): Failed and not being able to restore "
5117					"original state.");
5118			}
5119		}
5120
5121		cache->Resize(cache->virtual_base + oldSize, priority);
5122	}
5123
5124	// TODO: we must honour the lock restrictions of this area
5125	return status;
5126}
5127
5128
5129status_t
5130vm_memset_physical(phys_addr_t address, int value, phys_size_t length)
5131{
5132	return sPhysicalPageMapper->MemsetPhysical(address, value, length);
5133}
5134
5135
5136status_t
5137vm_memcpy_from_physical(void* to, phys_addr_t from, size_t length, bool user)
5138{
5139	return sPhysicalPageMapper->MemcpyFromPhysical(to, from, length, user);
5140}
5141
5142
5143status_t
5144vm_memcpy_to_physical(phys_addr_t to, const void* _from, size_t length,
5145	bool user)
5146{
5147	return sPhysicalPageMapper->MemcpyToPhysical(to, _from, length, user);
5148}
5149
5150
5151void
5152vm_memcpy_physical_page(phys_addr_t to, phys_addr_t from)
5153{
5154	return sPhysicalPageMapper->MemcpyPhysicalPage(to, from);
5155}
5156
5157
5158/*!	Copies a range of memory directly from/to a page that might not be mapped
5159	at the moment.
5160
5161	For \a unsafeMemory the current mapping (if any is ignored). The function
5162	walks through the respective area's cache chain to find the physical page
5163	and copies from/to it directly.
5164	The memory range starting at \a unsafeMemory with a length of \a size bytes
5165	must not cross a page boundary.
5166
5167	\param teamID The team ID identifying the address space \a unsafeMemory is
5168		to be interpreted in. Ignored, if \a unsafeMemory is a kernel address
5169		(the kernel address space is assumed in this case). If \c B_CURRENT_TEAM
5170		is passed, the address space of the thread returned by
5171		debug_get_debugged_thread() is used.
5172	\param unsafeMemory The start of the unsafe memory range to be copied
5173		from/to.
5174	\param buffer A safely accessible kernel buffer to be copied from/to.
5175	\param size The number of bytes to be copied.
5176	\param copyToUnsafe If \c true, memory is copied from \a buffer to
5177		\a unsafeMemory, the other way around otherwise.
5178*/
5179status_t
5180vm_debug_copy_page_memory(team_id teamID, void* unsafeMemory, void* buffer,
5181	size_t size, bool copyToUnsafe)
5182{
5183	if (size > B_PAGE_SIZE || ROUNDDOWN((addr_t)unsafeMemory, B_PAGE_SIZE)
5184			!= ROUNDDOWN((addr_t)unsafeMemory + size - 1, B_PAGE_SIZE)) {
5185		return B_BAD_VALUE;
5186	}
5187
5188	// get the address space for the debugged thread
5189	VMAddressSpace* addressSpace;
5190	if (IS_KERNEL_ADDRESS(unsafeMemory)) {
5191		addressSpace = VMAddressSpace::Kernel();
5192	} else if (teamID == B_CURRENT_TEAM) {
5193		Thread* thread = debug_get_debugged_thread();
5194		if (thread == NULL || thread->team == NULL)
5195			return B_BAD_ADDRESS;
5196
5197		addressSpace = thread->team->address_space;
5198	} else
5199		addressSpace = VMAddressSpace::DebugGet(teamID);
5200
5201	if (addressSpace == NULL)
5202		return B_BAD_ADDRESS;
5203
5204	// get the area
5205	VMArea* area = addressSpace->LookupArea((addr_t)unsafeMemory);
5206	if (area == NULL)
5207		return B_BAD_ADDRESS;
5208
5209	// search the page
5210	off_t cacheOffset = (addr_t)unsafeMemory - area->Base()
5211		+ area->cache_offset;
5212	VMCache* cache = area->cache;
5213	vm_page* page = NULL;
5214	while (cache != NULL) {
5215		page = cache->DebugLookupPage(cacheOffset);
5216		if (page != NULL)
5217			break;
5218
5219		// Page not found in this cache -- if it is paged out, we must not try
5220		// to get it from lower caches.
5221		if (cache->DebugHasPage(cacheOffset))
5222			break;
5223
5224		cache = cache->source;
5225	}
5226
5227	if (page == NULL)
5228		return B_UNSUPPORTED;
5229
5230	// copy from/to physical memory
5231	phys_addr_t physicalAddress = page->physical_page_number * B_PAGE_SIZE
5232		+ (addr_t)unsafeMemory % B_PAGE_SIZE;
5233
5234	if (copyToUnsafe) {
5235		if (page->Cache() != area->cache)
5236			return B_UNSUPPORTED;
5237
5238		return vm_memcpy_to_physical(physicalAddress, buffer, size, false);
5239	}
5240
5241	return vm_memcpy_from_physical(buffer, physicalAddress, size, false);
5242}
5243
5244
5245//	#pragma mark - kernel public API
5246
5247
5248status_t
5249user_memcpy(void* to, const void* from, size_t size)
5250{
5251	// don't allow address overflows
5252	if ((addr_t)from + size < (addr_t)from || (addr_t)to + size < (addr_t)to)
5253		return B_BAD_ADDRESS;
5254
5255	if (arch_cpu_user_memcpy(to, from, size) < B_OK)
5256		return B_BAD_ADDRESS;
5257
5258	return B_OK;
5259}
5260
5261
5262/*!	\brief Copies at most (\a size - 1) characters from the string in \a from to
5263	the string in \a to, NULL-terminating the result.
5264
5265	\param to Pointer to the destination C-string.
5266	\param from Pointer to the source C-string.
5267	\param size Size in bytes of the string buffer pointed to by \a to.
5268
5269	\return strlen(\a from).
5270*/
5271ssize_t
5272user_strlcpy(char* to, const char* from, size_t size)
5273{
5274	if (to == NULL && size != 0)
5275		return B_BAD_VALUE;
5276	if (from == NULL)
5277		return B_BAD_ADDRESS;
5278
5279	// limit size to avoid address overflows
5280	size_t maxSize = std::min((addr_t)size,
5281		~(addr_t)0 - std::max((addr_t)from, (addr_t)to) + 1);
5282		// NOTE: Since arch_cpu_user_strlcpy() determines the length of \a from,
5283		// the source address might still overflow.
5284
5285	ssize_t result = arch_cpu_user_strlcpy(to, from, maxSize);
5286
5287	// If we hit the address overflow boundary, fail.
5288	if (result < 0 || (result >= 0 && (size_t)result >= maxSize
5289			&& maxSize < size)) {
5290		return B_BAD_ADDRESS;
5291	}
5292
5293	return result;
5294}
5295
5296
5297status_t
5298user_memset(void* s, char c, size_t count)
5299{
5300	// don't allow address overflows
5301	if ((addr_t)s + count < (addr_t)s)
5302		return B_BAD_ADDRESS;
5303	if (arch_cpu_user_memset(s, c, count) < B_OK)
5304		return B_BAD_ADDRESS;
5305
5306	return B_OK;
5307}
5308
5309
5310/*!	Wires a single page at the given address.
5311
5312	\param team The team whose address space the address belongs to. Supports
5313		also \c B_CURRENT_TEAM. If the given address is a kernel address, the
5314		parameter is ignored.
5315	\param address address The virtual address to wire down. Does not need to
5316		be page aligned.
5317	\param writable If \c true the page shall be writable.
5318	\param info On success the info is filled in, among other things
5319		containing the physical address the given virtual one translates to.
5320	\return \c B_OK, when the page could be wired, another error code otherwise.
5321*/
5322status_t
5323vm_wire_page(team_id team, addr_t address, bool writable,
5324	VMPageWiringInfo* info)
5325{
5326	addr_t pageAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5327	info->range.SetTo(pageAddress, B_PAGE_SIZE, writable, false);
5328
5329	// compute the page protection that is required
5330	bool isUser = IS_USER_ADDRESS(address);
5331	uint32 requiredProtection = PAGE_PRESENT
5332		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5333	if (writable)
5334		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5335
5336	// get and read lock the address space
5337	VMAddressSpace* addressSpace = NULL;
5338	if (isUser) {
5339		if (team == B_CURRENT_TEAM)
5340			addressSpace = VMAddressSpace::GetCurrent();
5341		else
5342			addressSpace = VMAddressSpace::Get(team);
5343	} else
5344		addressSpace = VMAddressSpace::GetKernel();
5345	if (addressSpace == NULL)
5346		return B_ERROR;
5347
5348	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5349
5350	VMTranslationMap* map = addressSpace->TranslationMap();
5351	status_t error = B_OK;
5352
5353	// get the area
5354	VMArea* area = addressSpace->LookupArea(pageAddress);
5355	if (area == NULL) {
5356		addressSpace->Put();
5357		return B_BAD_ADDRESS;
5358	}
5359
5360	// Lock the area's top cache. This is a requirement for VMArea::Wire().
5361	VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5362
5363	// mark the area range wired
5364	area->Wire(&info->range);
5365
5366	// Lock the area's cache chain and the translation map. Needed to look
5367	// up the page and play with its wired count.
5368	cacheChainLocker.LockAllSourceCaches();
5369	map->Lock();
5370
5371	phys_addr_t physicalAddress;
5372	uint32 flags;
5373	vm_page* page;
5374	if (map->Query(pageAddress, &physicalAddress, &flags) == B_OK
5375		&& (flags & requiredProtection) == requiredProtection
5376		&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5377			!= NULL) {
5378		// Already mapped with the correct permissions -- just increment
5379		// the page's wired count.
5380		increment_page_wired_count(page);
5381
5382		map->Unlock();
5383		cacheChainLocker.Unlock();
5384		addressSpaceLocker.Unlock();
5385	} else {
5386		// Let vm_soft_fault() map the page for us, if possible. We need
5387		// to fully unlock to avoid deadlocks. Since we have already
5388		// wired the area itself, nothing disturbing will happen with it
5389		// in the meantime.
5390		map->Unlock();
5391		cacheChainLocker.Unlock();
5392		addressSpaceLocker.Unlock();
5393
5394		error = vm_soft_fault(addressSpace, pageAddress, writable, false,
5395			isUser, &page);
5396
5397		if (error != B_OK) {
5398			// The page could not be mapped -- clean up.
5399			VMCache* cache = vm_area_get_locked_cache(area);
5400			area->Unwire(&info->range);
5401			cache->ReleaseRefAndUnlock();
5402			addressSpace->Put();
5403			return error;
5404		}
5405	}
5406
5407	info->physicalAddress
5408		= (phys_addr_t)page->physical_page_number * B_PAGE_SIZE
5409			+ address % B_PAGE_SIZE;
5410	info->page = page;
5411
5412	return B_OK;
5413}
5414
5415
5416/*!	Unwires a single page previously wired via vm_wire_page().
5417
5418	\param info The same object passed to vm_wire_page() before.
5419*/
5420void
5421vm_unwire_page(VMPageWiringInfo* info)
5422{
5423	// lock the address space
5424	VMArea* area = info->range.area;
5425	AddressSpaceReadLocker addressSpaceLocker(area->address_space, false);
5426		// takes over our reference
5427
5428	// lock the top cache
5429	VMCache* cache = vm_area_get_locked_cache(area);
5430	VMCacheChainLocker cacheChainLocker(cache);
5431
5432	if (info->page->Cache() != cache) {
5433		// The page is not in the top cache, so we lock the whole cache chain
5434		// before touching the page's wired count.
5435		cacheChainLocker.LockAllSourceCaches();
5436	}
5437
5438	decrement_page_wired_count(info->page);
5439
5440	// remove the wired range from the range
5441	area->Unwire(&info->range);
5442
5443	cacheChainLocker.Unlock();
5444}
5445
5446
5447/*!	Wires down the given address range in the specified team's address space.
5448
5449	If successful the function
5450	- acquires a reference to the specified team's address space,
5451	- adds respective wired ranges to all areas that intersect with the given
5452	  address range,
5453	- makes sure all pages in the given address range are mapped with the
5454	  requested access permissions and increments their wired count.
5455
5456	It fails, when \a team doesn't specify a valid address space, when any part
5457	of the specified address range is not covered by areas, when the concerned
5458	areas don't allow mapping with the requested permissions, or when mapping
5459	failed for another reason.
5460
5461	When successful the call must be balanced by a unlock_memory_etc() call with
5462	the exact same parameters.
5463
5464	\param team Identifies the address (via team ID). \c B_CURRENT_TEAM is
5465		supported.
5466	\param address The start of the address range to be wired.
5467	\param numBytes The size of the address range to be wired.
5468	\param flags Flags. Currently only \c B_READ_DEVICE is defined, which
5469		requests that the range must be wired writable ("read from device
5470		into memory").
5471	\return \c B_OK on success, another error code otherwise.
5472*/
5473status_t
5474lock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5475{
5476	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5477	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5478
5479	// compute the page protection that is required
5480	bool isUser = IS_USER_ADDRESS(address);
5481	bool writable = (flags & B_READ_DEVICE) == 0;
5482	uint32 requiredProtection = PAGE_PRESENT
5483		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5484	if (writable)
5485		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5486
5487	uint32 mallocFlags = isUser
5488		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5489
5490	// get and read lock the address space
5491	VMAddressSpace* addressSpace = NULL;
5492	if (isUser) {
5493		if (team == B_CURRENT_TEAM)
5494			addressSpace = VMAddressSpace::GetCurrent();
5495		else
5496			addressSpace = VMAddressSpace::Get(team);
5497	} else
5498		addressSpace = VMAddressSpace::GetKernel();
5499	if (addressSpace == NULL)
5500		return B_ERROR;
5501
5502	AddressSpaceReadLocker addressSpaceLocker(addressSpace, true);
5503		// We get a new address space reference here. The one we got above will
5504		// be freed by unlock_memory_etc().
5505
5506	VMTranslationMap* map = addressSpace->TranslationMap();
5507	status_t error = B_OK;
5508
5509	// iterate through all concerned areas
5510	addr_t nextAddress = lockBaseAddress;
5511	while (nextAddress != lockEndAddress) {
5512		// get the next area
5513		VMArea* area = addressSpace->LookupArea(nextAddress);
5514		if (area == NULL) {
5515			error = B_BAD_ADDRESS;
5516			break;
5517		}
5518
5519		addr_t areaStart = nextAddress;
5520		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5521
5522		// allocate the wired range (do that before locking the cache to avoid
5523		// deadlocks)
5524		VMAreaWiredRange* range = new(malloc_flags(mallocFlags))
5525			VMAreaWiredRange(areaStart, areaEnd - areaStart, writable, true);
5526		if (range == NULL) {
5527			error = B_NO_MEMORY;
5528			break;
5529		}
5530
5531		// Lock the area's top cache. This is a requirement for VMArea::Wire().
5532		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5533
5534		// mark the area range wired
5535		area->Wire(range);
5536
5537		// Depending on the area cache type and the wiring, we may not need to
5538		// look at the individual pages.
5539		if (area->cache_type == CACHE_TYPE_NULL
5540			|| area->cache_type == CACHE_TYPE_DEVICE
5541			|| area->wiring == B_FULL_LOCK
5542			|| area->wiring == B_CONTIGUOUS) {
5543			nextAddress = areaEnd;
5544			continue;
5545		}
5546
5547		// Lock the area's cache chain and the translation map. Needed to look
5548		// up pages and play with their wired count.
5549		cacheChainLocker.LockAllSourceCaches();
5550		map->Lock();
5551
5552		// iterate through the pages and wire them
5553		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5554			phys_addr_t physicalAddress;
5555			uint32 flags;
5556
5557			vm_page* page;
5558			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5559				&& (flags & requiredProtection) == requiredProtection
5560				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5561					!= NULL) {
5562				// Already mapped with the correct permissions -- just increment
5563				// the page's wired count.
5564				increment_page_wired_count(page);
5565			} else {
5566				// Let vm_soft_fault() map the page for us, if possible. We need
5567				// to fully unlock to avoid deadlocks. Since we have already
5568				// wired the area itself, nothing disturbing will happen with it
5569				// in the meantime.
5570				map->Unlock();
5571				cacheChainLocker.Unlock();
5572				addressSpaceLocker.Unlock();
5573
5574				error = vm_soft_fault(addressSpace, nextAddress, writable,
5575					false, isUser, &page);
5576
5577				addressSpaceLocker.Lock();
5578				cacheChainLocker.SetTo(vm_area_get_locked_cache(area));
5579				cacheChainLocker.LockAllSourceCaches();
5580				map->Lock();
5581			}
5582
5583			if (error != B_OK)
5584				break;
5585		}
5586
5587		map->Unlock();
5588
5589		if (error == B_OK) {
5590			cacheChainLocker.Unlock();
5591		} else {
5592			// An error occurred, so abort right here. If the current address
5593			// is the first in this area, unwire the area, since we won't get
5594			// to it when reverting what we've done so far.
5595			if (nextAddress == areaStart) {
5596				area->Unwire(range);
5597				cacheChainLocker.Unlock();
5598				range->~VMAreaWiredRange();
5599				free_etc(range, mallocFlags);
5600			} else
5601				cacheChainLocker.Unlock();
5602
5603			break;
5604		}
5605	}
5606
5607	if (error != B_OK) {
5608		// An error occurred, so unwire all that we've already wired. Note that
5609		// even if not a single page was wired, unlock_memory_etc() is called
5610		// to put the address space reference.
5611		addressSpaceLocker.Unlock();
5612		unlock_memory_etc(team, (void*)lockBaseAddress,
5613			nextAddress - lockBaseAddress, flags);
5614	}
5615
5616	return error;
5617}
5618
5619
5620status_t
5621lock_memory(void* address, size_t numBytes, uint32 flags)
5622{
5623	return lock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5624}
5625
5626
5627/*!	Unwires an address range previously wired with lock_memory_etc().
5628
5629	Note that a call to this function must balance a previous lock_memory_etc()
5630	call with exactly the same parameters.
5631*/
5632status_t
5633unlock_memory_etc(team_id team, void* address, size_t numBytes, uint32 flags)
5634{
5635	addr_t lockBaseAddress = ROUNDDOWN((addr_t)address, B_PAGE_SIZE);
5636	addr_t lockEndAddress = ROUNDUP((addr_t)address + numBytes, B_PAGE_SIZE);
5637
5638	// compute the page protection that is required
5639	bool isUser = IS_USER_ADDRESS(address);
5640	bool writable = (flags & B_READ_DEVICE) == 0;
5641	uint32 requiredProtection = PAGE_PRESENT
5642		| B_KERNEL_READ_AREA | (isUser ? B_READ_AREA : 0);
5643	if (writable)
5644		requiredProtection |= B_KERNEL_WRITE_AREA | (isUser ? B_WRITE_AREA : 0);
5645
5646	uint32 mallocFlags = isUser
5647		? 0 : HEAP_DONT_WAIT_FOR_MEMORY | HEAP_DONT_LOCK_KERNEL_SPACE;
5648
5649	// get and read lock the address space
5650	VMAddressSpace* addressSpace = NULL;
5651	if (isUser) {
5652		if (team == B_CURRENT_TEAM)
5653			addressSpace = VMAddressSpace::GetCurrent();
5654		else
5655			addressSpace = VMAddressSpace::Get(team);
5656	} else
5657		addressSpace = VMAddressSpace::GetKernel();
5658	if (addressSpace == NULL)
5659		return B_ERROR;
5660
5661	AddressSpaceReadLocker addressSpaceLocker(addressSpace, false);
5662		// Take over the address space reference. We don't unlock until we're
5663		// done.
5664
5665	VMTranslationMap* map = addressSpace->TranslationMap();
5666	status_t error = B_OK;
5667
5668	// iterate through all concerned areas
5669	addr_t nextAddress = lockBaseAddress;
5670	while (nextAddress != lockEndAddress) {
5671		// get the next area
5672		VMArea* area = addressSpace->LookupArea(nextAddress);
5673		if (area == NULL) {
5674			error = B_BAD_ADDRESS;
5675			break;
5676		}
5677
5678		addr_t areaStart = nextAddress;
5679		addr_t areaEnd = std::min(lockEndAddress, area->Base() + area->Size());
5680
5681		// Lock the area's top cache. This is a requirement for
5682		// VMArea::Unwire().
5683		VMCacheChainLocker cacheChainLocker(vm_area_get_locked_cache(area));
5684
5685		// Depending on the area cache type and the wiring, we may not need to
5686		// look at the individual pages.
5687		if (area->cache_type == CACHE_TYPE_NULL
5688			|| area->cache_type == CACHE_TYPE_DEVICE
5689			|| area->wiring == B_FULL_LOCK
5690			|| area->wiring == B_CONTIGUOUS) {
5691			// unwire the range (to avoid deadlocks we delete the range after
5692			// unlocking the cache)
5693			nextAddress = areaEnd;
5694			VMAreaWiredRange* range = area->Unwire(areaStart,
5695				areaEnd - areaStart, writable);
5696			cacheChainLocker.Unlock();
5697			if (range != NULL) {
5698				range->~VMAreaWiredRange();
5699				free_etc(range, mallocFlags);
5700			}
5701			continue;
5702		}
5703
5704		// Lock the area's cache chain and the translation map. Needed to look
5705		// up pages and play with their wired count.
5706		cacheChainLocker.LockAllSourceCaches();
5707		map->Lock();
5708
5709		// iterate through the pages and unwire them
5710		for (; nextAddress != areaEnd; nextAddress += B_PAGE_SIZE) {
5711			phys_addr_t physicalAddress;
5712			uint32 flags;
5713
5714			vm_page* page;
5715			if (map->Query(nextAddress, &physicalAddress, &flags) == B_OK
5716				&& (flags & PAGE_PRESENT) != 0
5717				&& (page = vm_lookup_page(physicalAddress / B_PAGE_SIZE))
5718					!= NULL) {
5719				// Already mapped with the correct permissions -- just increment
5720				// the page's wired count.
5721				decrement_page_wired_count(page);
5722			} else {
5723				panic("unlock_memory_etc(): Failed to unwire page: address "
5724					"space %p, address: %#" B_PRIxADDR, addressSpace,
5725					nextAddress);
5726				error = B_BAD_VALUE;
5727				break;
5728			}
5729		}
5730
5731		map->Unlock();
5732
5733		// All pages are unwired. Remove the area's wired range as well (to
5734		// avoid deadlocks we delete the range after unlocking the cache).
5735		VMAreaWiredRange* range = area->Unwire(areaStart,
5736			areaEnd - areaStart, writable);
5737
5738		cacheChainLocker.Unlock();
5739
5740		if (range != NULL) {
5741			range->~VMAreaWiredRange();
5742			free_etc(range, mallocFlags);
5743		}
5744
5745		if (error != B_OK)
5746			break;
5747	}
5748
5749	// get rid of the address space reference lock_memory_etc() acquired
5750	addressSpace->Put();
5751
5752	return error;
5753}
5754
5755
5756status_t
5757unlock_memory(void* address, size_t numBytes, uint32 flags)
5758{
5759	return unlock_memory_etc(B_CURRENT_TEAM, address, numBytes, flags);
5760}
5761
5762
5763/*!	Similar to get_memory_map(), but also allows to specify the address space
5764	for the memory in question and has a saner semantics.
5765	Returns \c B_OK when the complete range could be translated or
5766	\c B_BUFFER_OVERFLOW, if the provided array wasn't big enough. In either
5767	case the actual number of entries is written to \c *_numEntries. Any other
5768	error case indicates complete failure; \c *_numEntries will be set to \c 0
5769	in this case.
5770*/
5771status_t
5772get_memory_map_etc(team_id team, const void* address, size_t numBytes,
5773	physical_entry* table, uint32* _numEntries)
5774{
5775	uint32 numEntries = *_numEntries;
5776	*_numEntries = 0;
5777
5778	VMAddressSpace* addressSpace;
5779	addr_t virtualAddress = (addr_t)address;
5780	addr_t pageOffset = virtualAddress & (B_PAGE_SIZE - 1);
5781	phys_addr_t physicalAddress;
5782	status_t status = B_OK;
5783	int32 index = -1;
5784	addr_t offset = 0;
5785	bool interrupts = are_interrupts_enabled();
5786
5787	TRACE(("get_memory_map_etc(%" B_PRId32 ", %p, %lu bytes, %" B_PRIu32 " "
5788		"entries)\n", team, address, numBytes, numEntries));
5789
5790	if (numEntries == 0 || numBytes == 0)
5791		return B_BAD_VALUE;
5792
5793	// in which address space is the address to be found?
5794	if (IS_USER_ADDRESS(virtualAddress)) {
5795		if (team == B_CURRENT_TEAM)
5796			addressSpace = VMAddressSpace::GetCurrent();
5797		else
5798			addressSpace = VMAddressSpace::Get(team);
5799	} else
5800		addressSpace = VMAddressSpace::GetKernel();
5801
5802	if (addressSpace == NULL)
5803		return B_ERROR;
5804
5805	VMTranslationMap* map = addressSpace->TranslationMap();
5806
5807	if (interrupts)
5808		map->Lock();
5809
5810	while (offset < numBytes) {
5811		addr_t bytes = min_c(numBytes - offset, B_PAGE_SIZE);
5812		uint32 flags;
5813
5814		if (interrupts) {
5815			status = map->Query((addr_t)address + offset, &physicalAddress,
5816				&flags);
5817		} else {
5818			status = map->QueryInterrupt((addr_t)address + offset,
5819				&physicalAddress, &flags);
5820		}
5821		if (status < B_OK)
5822			break;
5823		if ((flags & PAGE_PRESENT) == 0) {
5824			panic("get_memory_map() called on unmapped memory!");
5825			return B_BAD_ADDRESS;
5826		}
5827
5828		if (index < 0 && pageOffset > 0) {
5829			physicalAddress += pageOffset;
5830			if (bytes > B_PAGE_SIZE - pageOffset)
5831				bytes = B_PAGE_SIZE - pageOffset;
5832		}
5833
5834		// need to switch to the next physical_entry?
5835		if (index < 0 || table[index].address
5836				!= physicalAddress - table[index].size) {
5837			if ((uint32)++index + 1 > numEntries) {
5838				// table to small
5839				break;
5840			}
5841			table[index].address = physicalAddress;
5842			table[index].size = bytes;
5843		} else {
5844			// page does fit in current entry
5845			table[index].size += bytes;
5846		}
5847
5848		offset += bytes;
5849	}
5850
5851	if (interrupts)
5852		map->Unlock();
5853
5854	if (status != B_OK)
5855		return status;
5856
5857	if ((uint32)index + 1 > numEntries) {
5858		*_numEntries = index;
5859		return B_BUFFER_OVERFLOW;
5860	}
5861
5862	*_numEntries = index + 1;
5863	return B_OK;
5864}
5865
5866
5867/*!	According to the BeBook, this function should always succeed.
5868	This is no longer the case.
5869*/
5870extern "C" int32
5871__get_memory_map_haiku(const void* address, size_t numBytes,
5872	physical_entry* table, int32 numEntries)
5873{
5874	uint32 entriesRead = numEntries;
5875	status_t error = get_memory_map_etc(B_CURRENT_TEAM, address, numBytes,
5876		table, &entriesRead);
5877	if (error != B_OK)
5878		return error;
5879
5880	// close the entry list
5881
5882	// if it's only one entry, we will silently accept the missing ending
5883	if (numEntries == 1)
5884		return B_OK;
5885
5886	if (entriesRead + 1 > (uint32)numEntries)
5887		return B_BUFFER_OVERFLOW;
5888
5889	table[entriesRead].address = 0;
5890	table[entriesRead].size = 0;
5891
5892	return B_OK;
5893}
5894
5895
5896area_id
5897area_for(void* address)
5898{
5899	return vm_area_for((addr_t)address, true);
5900}
5901
5902
5903area_id
5904find_area(const char* name)
5905{
5906	return VMAreaHash::Find(name);
5907}
5908
5909
5910status_t
5911_get_area_info(area_id id, area_info* info, size_t size)
5912{
5913	if (size != sizeof(area_info) || info == NULL)
5914		return B_BAD_VALUE;
5915
5916	AddressSpaceReadLocker locker;
5917	VMArea* area;
5918	status_t status = locker.SetFromArea(id, area);
5919	if (status != B_OK)
5920		return status;
5921
5922	fill_area_info(area, info, size);
5923	return B_OK;
5924}
5925
5926
5927status_t
5928_get_next_area_info(team_id team, ssize_t* cookie, area_info* info, size_t size)
5929{
5930	addr_t nextBase = *(addr_t*)cookie;
5931
5932	// we're already through the list
5933	if (nextBase == (addr_t)-1)
5934		return B_ENTRY_NOT_FOUND;
5935
5936	if (team == B_CURRENT_TEAM)
5937		team = team_get_current_team_id();
5938
5939	AddressSpaceReadLocker locker(team);
5940	if (!locker.IsLocked())
5941		return B_BAD_TEAM_ID;
5942
5943	VMArea* area;
5944	for (VMAddressSpace::AreaIterator it
5945				= locker.AddressSpace()->GetAreaIterator();
5946			(area = it.Next()) != NULL;) {
5947		if (area->Base() > nextBase)
5948			break;
5949	}
5950
5951	if (area == NULL) {
5952		nextBase = (addr_t)-1;
5953		return B_ENTRY_NOT_FOUND;
5954	}
5955
5956	fill_area_info(area, info, size);
5957	*cookie = (ssize_t)(area->Base());
5958
5959	return B_OK;
5960}
5961
5962
5963status_t
5964set_area_protection(area_id area, uint32 newProtection)
5965{
5966	return vm_set_area_protection(VMAddressSpace::KernelID(), area,
5967		newProtection, true);
5968}
5969
5970
5971status_t
5972resize_area(area_id areaID, size_t newSize)
5973{
5974	return vm_resize_area(areaID, newSize, true);
5975}
5976
5977
5978/*!	Transfers the specified area to a new team. The caller must be the owner
5979	of the area.
5980*/
5981area_id
5982transfer_area(area_id id, void** _address, uint32 addressSpec, team_id target,
5983	bool kernel)
5984{
5985	area_info info;
5986	status_t status = get_area_info(id, &info);
5987	if (status != B_OK)
5988		return status;
5989
5990	if (info.team != thread_get_current_thread()->team->id)
5991		return B_PERMISSION_DENIED;
5992
5993	// We need to mark the area cloneable so the following operations work.
5994	status = set_area_protection(id, info.protection | B_CLONEABLE_AREA);
5995	if (status != B_OK)
5996		return status;
5997
5998	area_id clonedArea = vm_clone_area(target, info.name, _address,
5999		addressSpec, info.protection, REGION_NO_PRIVATE_MAP, id, kernel);
6000	if (clonedArea < 0)
6001		return clonedArea;
6002
6003	status = vm_delete_area(info.team, id, kernel);
6004	if (status != B_OK) {
6005		vm_delete_area(target, clonedArea, kernel);
6006		return status;
6007	}
6008
6009	// Now we can reset the protection to whatever it was before.
6010	set_area_protection(clonedArea, info.protection);
6011
6012	// TODO: The clonedArea is B_SHARED_AREA, which is not really desired.
6013
6014	return clonedArea;
6015}
6016
6017
6018extern "C" area_id
6019__map_physical_memory_haiku(const char* name, phys_addr_t physicalAddress,
6020	size_t numBytes, uint32 addressSpec, uint32 protection,
6021	void** _virtualAddress)
6022{
6023	if (!arch_vm_supports_protection(protection))
6024		return B_NOT_SUPPORTED;
6025
6026	fix_protection(&protection);
6027
6028	return vm_map_physical_memory(VMAddressSpace::KernelID(), name,
6029		_virtualAddress, addressSpec, numBytes, protection, physicalAddress,
6030		false);
6031}
6032
6033
6034area_id
6035clone_area(const char* name, void** _address, uint32 addressSpec,
6036	uint32 protection, area_id source)
6037{
6038	if ((protection & B_KERNEL_PROTECTION) == 0)
6039		protection |= B_KERNEL_READ_AREA | B_KERNEL_WRITE_AREA;
6040
6041	return vm_clone_area(VMAddressSpace::KernelID(), name, _address,
6042		addressSpec, protection, REGION_NO_PRIVATE_MAP, source, true);
6043}
6044
6045
6046area_id
6047create_area_etc(team_id team, const char* name, size_t size, uint32 lock,
6048	uint32 protection, uint32 flags, uint32 guardSize,
6049	const virtual_address_restrictions* virtualAddressRestrictions,
6050	const physical_address_restrictions* physicalAddressRestrictions,
6051	void** _address)
6052{
6053	fix_protection(&protection);
6054
6055	return vm_create_anonymous_area(team, name, size, lock, protection, flags,
6056		guardSize, virtualAddressRestrictions, physicalAddressRestrictions,
6057		true, _address);
6058}
6059
6060
6061extern "C" area_id
6062__create_area_haiku(const char* name, void** _address, uint32 addressSpec,
6063	size_t size, uint32 lock, uint32 protection)
6064{
6065	fix_protection(&protection);
6066
6067	virtual_address_restrictions virtualRestrictions = {};
6068	virtualRestrictions.address = *_address;
6069	virtualRestrictions.address_specification = addressSpec;
6070	physical_address_restrictions physicalRestrictions = {};
6071	return vm_create_anonymous_area(VMAddressSpace::KernelID(), name, size,
6072		lock, protection, 0, 0, &virtualRestrictions, &physicalRestrictions,
6073		true, _address);
6074}
6075
6076
6077status_t
6078delete_area(area_id area)
6079{
6080	return vm_delete_area(VMAddressSpace::KernelID(), area, true);
6081}
6082
6083
6084//	#pragma mark - Userland syscalls
6085
6086
6087status_t
6088_user_reserve_address_range(addr_t* userAddress, uint32 addressSpec,
6089	addr_t size)
6090{
6091	// filter out some unavailable values (for userland)
6092	switch (addressSpec) {
6093		case B_ANY_KERNEL_ADDRESS:
6094		case B_ANY_KERNEL_BLOCK_ADDRESS:
6095			return B_BAD_VALUE;
6096	}
6097
6098	addr_t address;
6099
6100	if (!IS_USER_ADDRESS(userAddress)
6101		|| user_memcpy(&address, userAddress, sizeof(address)) != B_OK)
6102		return B_BAD_ADDRESS;
6103
6104	status_t status = vm_reserve_address_range(
6105		VMAddressSpace::CurrentID(), (void**)&address, addressSpec, size,
6106		RESERVED_AVOID_BASE);
6107	if (status != B_OK)
6108		return status;
6109
6110	if (user_memcpy(userAddress, &address, sizeof(address)) != B_OK) {
6111		vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6112			(void*)address, size);
6113		return B_BAD_ADDRESS;
6114	}
6115
6116	return B_OK;
6117}
6118
6119
6120status_t
6121_user_unreserve_address_range(addr_t address, addr_t size)
6122{
6123	return vm_unreserve_address_range(VMAddressSpace::CurrentID(),
6124		(void*)address, size);
6125}
6126
6127
6128area_id
6129_user_area_for(void* address)
6130{
6131	return vm_area_for((addr_t)address, false);
6132}
6133
6134
6135area_id
6136_user_find_area(const char* userName)
6137{
6138	char name[B_OS_NAME_LENGTH];
6139
6140	if (!IS_USER_ADDRESS(userName)
6141		|| user_strlcpy(name, userName, B_OS_NAME_LENGTH) < B_OK)
6142		return B_BAD_ADDRESS;
6143
6144	return find_area(name);
6145}
6146
6147
6148status_t
6149_user_get_area_info(area_id area, area_info* userInfo)
6150{
6151	if (!IS_USER_ADDRESS(userInfo))
6152		return B_BAD_ADDRESS;
6153
6154	area_info info;
6155	status_t status = get_area_info(area, &info);
6156	if (status < B_OK)
6157		return status;
6158
6159	// TODO: do we want to prevent userland from seeing kernel protections?
6160	//info.protection &= B_USER_PROTECTION;
6161
6162	if (user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6163		return B_BAD_ADDRESS;
6164
6165	return status;
6166}
6167
6168
6169status_t
6170_user_get_next_area_info(team_id team, ssize_t* userCookie, area_info* userInfo)
6171{
6172	ssize_t cookie;
6173
6174	if (!IS_USER_ADDRESS(userCookie)
6175		|| !IS_USER_ADDRESS(userInfo)
6176		|| user_memcpy(&cookie, userCookie, sizeof(ssize_t)) < B_OK)
6177		return B_BAD_ADDRESS;
6178
6179	area_info info;
6180	status_t status = _get_next_area_info(team, &cookie, &info,
6181		sizeof(area_info));
6182	if (status != B_OK)
6183		return status;
6184
6185	//info.protection &= B_USER_PROTECTION;
6186
6187	if (user_memcpy(userCookie, &cookie, sizeof(ssize_t)) < B_OK
6188		|| user_memcpy(userInfo, &info, sizeof(area_info)) < B_OK)
6189		return B_BAD_ADDRESS;
6190
6191	return status;
6192}
6193
6194
6195status_t
6196_user_set_area_protection(area_id area, uint32 newProtection)
6197{
6198	if ((newProtection & ~B_USER_PROTECTION) != 0)
6199		return B_BAD_VALUE;
6200
6201	return vm_set_area_protection(VMAddressSpace::CurrentID(), area,
6202		newProtection, false);
6203}
6204
6205
6206status_t
6207_user_resize_area(area_id area, size_t newSize)
6208{
6209	// TODO: Since we restrict deleting of areas to those owned by the team,
6210	// we should also do that for resizing (check other functions, too).
6211	return vm_resize_area(area, newSize, false);
6212}
6213
6214
6215area_id
6216_user_transfer_area(area_id area, void** userAddress, uint32 addressSpec,
6217	team_id target)
6218{
6219	// filter out some unavailable values (for userland)
6220	switch (addressSpec) {
6221		case B_ANY_KERNEL_ADDRESS:
6222		case B_ANY_KERNEL_BLOCK_ADDRESS:
6223			return B_BAD_VALUE;
6224	}
6225
6226	void* address;
6227	if (!IS_USER_ADDRESS(userAddress)
6228		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6229		return B_BAD_ADDRESS;
6230
6231	area_id newArea = transfer_area(area, &address, addressSpec, target, false);
6232	if (newArea < B_OK)
6233		return newArea;
6234
6235	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK)
6236		return B_BAD_ADDRESS;
6237
6238	return newArea;
6239}
6240
6241
6242area_id
6243_user_clone_area(const char* userName, void** userAddress, uint32 addressSpec,
6244	uint32 protection, area_id sourceArea)
6245{
6246	char name[B_OS_NAME_LENGTH];
6247	void* address;
6248
6249	// filter out some unavailable values (for userland)
6250	switch (addressSpec) {
6251		case B_ANY_KERNEL_ADDRESS:
6252		case B_ANY_KERNEL_BLOCK_ADDRESS:
6253			return B_BAD_VALUE;
6254	}
6255	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6256		return B_BAD_VALUE;
6257
6258	if (!IS_USER_ADDRESS(userName)
6259		|| !IS_USER_ADDRESS(userAddress)
6260		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6261		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6262		return B_BAD_ADDRESS;
6263
6264	fix_protection(&protection);
6265
6266	area_id clonedArea = vm_clone_area(VMAddressSpace::CurrentID(), name,
6267		&address, addressSpec, protection, REGION_NO_PRIVATE_MAP, sourceArea,
6268		false);
6269	if (clonedArea < B_OK)
6270		return clonedArea;
6271
6272	if (user_memcpy(userAddress, &address, sizeof(address)) < B_OK) {
6273		delete_area(clonedArea);
6274		return B_BAD_ADDRESS;
6275	}
6276
6277	return clonedArea;
6278}
6279
6280
6281area_id
6282_user_create_area(const char* userName, void** userAddress, uint32 addressSpec,
6283	size_t size, uint32 lock, uint32 protection)
6284{
6285	char name[B_OS_NAME_LENGTH];
6286	void* address;
6287
6288	// filter out some unavailable values (for userland)
6289	switch (addressSpec) {
6290		case B_ANY_KERNEL_ADDRESS:
6291		case B_ANY_KERNEL_BLOCK_ADDRESS:
6292			return B_BAD_VALUE;
6293	}
6294	if ((protection & ~B_USER_AREA_FLAGS) != 0)
6295		return B_BAD_VALUE;
6296
6297	if (!IS_USER_ADDRESS(userName)
6298		|| !IS_USER_ADDRESS(userAddress)
6299		|| user_strlcpy(name, userName, sizeof(name)) < B_OK
6300		|| user_memcpy(&address, userAddress, sizeof(address)) < B_OK)
6301		return B_BAD_ADDRESS;
6302
6303	if (addressSpec == B_EXACT_ADDRESS
6304		&& IS_KERNEL_ADDRESS(address))
6305		return B_BAD_VALUE;
6306
6307	if (addressSpec == B_ANY_ADDRESS)
6308		addressSpec = B_RANDOMIZED_ANY_ADDRESS;
6309	if (addressSpec == B_BASE_ADDRESS)
6310		addressSpec = B_RANDOMIZED_BASE_ADDRESS;
6311
6312	fix_protection(&protection);
6313
6314	virtual_address_restrictions virtualRestrictions = {};
6315	virtualRestrictions.address = address;
6316	virtualRestrictions.address_specification = addressSpec;
6317	physical_address_restrictions physicalRestrictions = {};
6318	area_id area = vm_create_anonymous_area(VMAddressSpace::CurrentID(), name,
6319		size, lock, protection, 0, 0, &virtualRestrictions,
6320		&physicalRestrictions, false, &address);
6321</