1/******************************************************************************
2
3  Copyright (c) 2001-2015, Intel Corporation
4  All rights reserved.
5
6  Redistribution and use in source and binary forms, with or without
7  modification, are permitted provided that the following conditions are met:
8
9   1. Redistributions of source code must retain the above copyright notice,
10      this list of conditions and the following disclaimer.
11
12   2. Redistributions in binary form must reproduce the above copyright
13      notice, this list of conditions and the following disclaimer in the
14      documentation and/or other materials provided with the distribution.
15
16   3. Neither the name of the Intel Corporation nor the names of its
17      contributors may be used to endorse or promote products derived from
18      this software without specific prior written permission.
19
20  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  POSSIBILITY OF SUCH DAMAGE.
31
32******************************************************************************/
33/*$FreeBSD: stable/11/sys/dev/e1000/if_lem.c 323292 2017-09-08 00:11:10Z marius $*/
34
35/*
36 * Uncomment the following extensions for better performance in a VM,
37 * especially if you have support in the hypervisor.
38 * See http://info.iet.unipi.it/~luigi/netmap/
39 */
40// #define BATCH_DISPATCH
41// #define NIC_SEND_COMBINING
42// #define NIC_PARAVIRT	/* enable virtio-like synchronization */
43
44#include "opt_inet.h"
45#include "opt_inet6.h"
46
47#ifdef HAVE_KERNEL_OPTION_HEADERS
48#include "opt_device_polling.h"
49#endif
50
51#include <sys/param.h>
52#include <sys/systm.h>
53#include <sys/buf_ring.h>
54#include <sys/bus.h>
55#include <sys/endian.h>
56#include <sys/kernel.h>
57#include <sys/kthread.h>
58#include <sys/malloc.h>
59#include <sys/mbuf.h>
60#include <sys/module.h>
61#include <sys/rman.h>
62#include <sys/socket.h>
63#include <sys/sockio.h>
64#include <sys/sysctl.h>
65#include <sys/taskqueue.h>
66#include <sys/eventhandler.h>
67#include <machine/bus.h>
68#include <machine/resource.h>
69
70#include <net/bpf.h>
71#include <net/ethernet.h>
72#include <net/if.h>
73#include <net/if_var.h>
74#include <net/if_arp.h>
75#include <net/if_dl.h>
76#include <net/if_media.h>
77
78#include <net/if_types.h>
79#include <net/if_vlan_var.h>
80
81#include <netinet/in_systm.h>
82#include <netinet/in.h>
83#include <netinet/if_ether.h>
84#include <netinet/ip.h>
85#include <netinet/ip6.h>
86#include <netinet/tcp.h>
87#include <netinet/udp.h>
88
89#include <machine/in_cksum.h>
90#include <dev/led/led.h>
91#include <dev/pci/pcivar.h>
92#include <dev/pci/pcireg.h>
93
94#include "e1000_api.h"
95#include "if_lem.h"
96
97/*********************************************************************
98 *  Legacy Em Driver version:
99 *********************************************************************/
100char lem_driver_version[] = "1.1.0";
101
102/*********************************************************************
103 *  PCI Device ID Table
104 *
105 *  Used by probe to select devices to load on
106 *  Last field stores an index into e1000_strings
107 *  Last entry must be all 0s
108 *
109 *  { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index }
110 *********************************************************************/
111
112static em_vendor_info_t lem_vendor_info_array[] =
113{
114	/* Intel(R) PRO/1000 Network Connection */
115	{ 0x8086, E1000_DEV_ID_82540EM,		PCI_ANY_ID, PCI_ANY_ID, 0},
116	{ 0x8086, E1000_DEV_ID_82540EM_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
117	{ 0x8086, E1000_DEV_ID_82540EP,		PCI_ANY_ID, PCI_ANY_ID, 0},
118	{ 0x8086, E1000_DEV_ID_82540EP_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
119	{ 0x8086, E1000_DEV_ID_82540EP_LP,	PCI_ANY_ID, PCI_ANY_ID, 0},
120
121	{ 0x8086, E1000_DEV_ID_82541EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
122	{ 0x8086, E1000_DEV_ID_82541ER,		PCI_ANY_ID, PCI_ANY_ID, 0},
123	{ 0x8086, E1000_DEV_ID_82541ER_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
124	{ 0x8086, E1000_DEV_ID_82541EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
125	{ 0x8086, E1000_DEV_ID_82541GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
126	{ 0x8086, E1000_DEV_ID_82541GI_LF,	PCI_ANY_ID, PCI_ANY_ID, 0},
127	{ 0x8086, E1000_DEV_ID_82541GI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
128
129	{ 0x8086, E1000_DEV_ID_82542,		PCI_ANY_ID, PCI_ANY_ID, 0},
130
131	{ 0x8086, E1000_DEV_ID_82543GC_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
132	{ 0x8086, E1000_DEV_ID_82543GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
133
134	{ 0x8086, E1000_DEV_ID_82544EI_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
135	{ 0x8086, E1000_DEV_ID_82544EI_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
136	{ 0x8086, E1000_DEV_ID_82544GC_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
137	{ 0x8086, E1000_DEV_ID_82544GC_LOM,	PCI_ANY_ID, PCI_ANY_ID, 0},
138
139	{ 0x8086, E1000_DEV_ID_82545EM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
140	{ 0x8086, E1000_DEV_ID_82545EM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
141	{ 0x8086, E1000_DEV_ID_82545GM_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
142	{ 0x8086, E1000_DEV_ID_82545GM_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
143	{ 0x8086, E1000_DEV_ID_82545GM_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
144
145	{ 0x8086, E1000_DEV_ID_82546EB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
146	{ 0x8086, E1000_DEV_ID_82546EB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
147	{ 0x8086, E1000_DEV_ID_82546EB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
148	{ 0x8086, E1000_DEV_ID_82546GB_COPPER,	PCI_ANY_ID, PCI_ANY_ID, 0},
149	{ 0x8086, E1000_DEV_ID_82546GB_FIBER,	PCI_ANY_ID, PCI_ANY_ID, 0},
150	{ 0x8086, E1000_DEV_ID_82546GB_SERDES,	PCI_ANY_ID, PCI_ANY_ID, 0},
151	{ 0x8086, E1000_DEV_ID_82546GB_PCIE,	PCI_ANY_ID, PCI_ANY_ID, 0},
152	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER, PCI_ANY_ID, PCI_ANY_ID, 0},
153	{ 0x8086, E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3,
154						PCI_ANY_ID, PCI_ANY_ID, 0},
155
156	{ 0x8086, E1000_DEV_ID_82547EI,		PCI_ANY_ID, PCI_ANY_ID, 0},
157	{ 0x8086, E1000_DEV_ID_82547EI_MOBILE,	PCI_ANY_ID, PCI_ANY_ID, 0},
158	{ 0x8086, E1000_DEV_ID_82547GI,		PCI_ANY_ID, PCI_ANY_ID, 0},
159	/* required last entry */
160	{ 0, 0, 0, 0, 0}
161};
162
163/*********************************************************************
164 *  Table of branding strings for all supported NICs.
165 *********************************************************************/
166
167static char *lem_strings[] = {
168	"Intel(R) PRO/1000 Legacy Network Connection"
169};
170
171/*********************************************************************
172 *  Function prototypes
173 *********************************************************************/
174static int	lem_probe(device_t);
175static int	lem_attach(device_t);
176static int	lem_detach(device_t);
177static int	lem_shutdown(device_t);
178static int	lem_suspend(device_t);
179static int	lem_resume(device_t);
180static void	lem_start(if_t);
181static void	lem_start_locked(if_t ifp);
182static int	lem_ioctl(if_t, u_long, caddr_t);
183static uint64_t	lem_get_counter(if_t, ift_counter);
184static void	lem_init(void *);
185static void	lem_init_locked(struct adapter *);
186static void	lem_stop(void *);
187static void	lem_media_status(if_t, struct ifmediareq *);
188static int	lem_media_change(if_t);
189static void	lem_identify_hardware(struct adapter *);
190static int	lem_allocate_pci_resources(struct adapter *);
191static int	lem_allocate_irq(struct adapter *adapter);
192static void	lem_free_pci_resources(struct adapter *);
193static void	lem_local_timer(void *);
194static int	lem_hardware_init(struct adapter *);
195static int	lem_setup_interface(device_t, struct adapter *);
196static void	lem_setup_transmit_structures(struct adapter *);
197static void	lem_initialize_transmit_unit(struct adapter *);
198static int	lem_setup_receive_structures(struct adapter *);
199static void	lem_initialize_receive_unit(struct adapter *);
200static void	lem_enable_intr(struct adapter *);
201static void	lem_disable_intr(struct adapter *);
202static void	lem_free_transmit_structures(struct adapter *);
203static void	lem_free_receive_structures(struct adapter *);
204static void	lem_update_stats_counters(struct adapter *);
205static void	lem_add_hw_stats(struct adapter *adapter);
206static void	lem_txeof(struct adapter *);
207static void	lem_tx_purge(struct adapter *);
208static int	lem_allocate_receive_structures(struct adapter *);
209static int	lem_allocate_transmit_structures(struct adapter *);
210static bool	lem_rxeof(struct adapter *, int, int *);
211#ifndef __NO_STRICT_ALIGNMENT
212static int	lem_fixup_rx(struct adapter *);
213#endif
214static void	lem_receive_checksum(struct adapter *, struct e1000_rx_desc *,
215		    struct mbuf *);
216static void	lem_transmit_checksum_setup(struct adapter *, struct mbuf *,
217		    u32 *, u32 *);
218static void	lem_set_promisc(struct adapter *);
219static void	lem_disable_promisc(struct adapter *);
220static void	lem_set_multi(struct adapter *);
221static void	lem_update_link_status(struct adapter *);
222static int	lem_get_buf(struct adapter *, int);
223static void	lem_register_vlan(void *, if_t, u16);
224static void	lem_unregister_vlan(void *, if_t, u16);
225static void	lem_setup_vlan_hw_support(struct adapter *);
226static int	lem_xmit(struct adapter *, struct mbuf **);
227static void	lem_smartspeed(struct adapter *);
228static int	lem_82547_fifo_workaround(struct adapter *, int);
229static void	lem_82547_update_fifo_head(struct adapter *, int);
230static int	lem_82547_tx_fifo_reset(struct adapter *);
231static void	lem_82547_move_tail(void *);
232static int	lem_dma_malloc(struct adapter *, bus_size_t,
233		    struct em_dma_alloc *, int);
234static void	lem_dma_free(struct adapter *, struct em_dma_alloc *);
235static int	lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS);
236static void	lem_print_nvm_info(struct adapter *);
237static int 	lem_is_valid_ether_addr(u8 *);
238static u32	lem_fill_descriptors (bus_addr_t address, u32 length,
239		    PDESC_ARRAY desc_array);
240static int	lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS);
241static void	lem_add_int_delay_sysctl(struct adapter *, const char *,
242		    const char *, struct em_int_delay_info *, int, int);
243static void	lem_set_flow_cntrl(struct adapter *, const char *,
244		    const char *, int *, int);
245/* Management and WOL Support */
246static void	lem_init_manageability(struct adapter *);
247static void	lem_release_manageability(struct adapter *);
248static void     lem_get_hw_control(struct adapter *);
249static void     lem_release_hw_control(struct adapter *);
250static void	lem_get_wakeup(device_t);
251static void     lem_enable_wakeup(device_t);
252static int	lem_enable_phy_wakeup(struct adapter *);
253static void	lem_led_func(void *, int);
254
255static void	lem_intr(void *);
256static int	lem_irq_fast(void *);
257static void	lem_handle_rxtx(void *context, int pending);
258static void	lem_handle_link(void *context, int pending);
259static void	lem_add_rx_process_limit(struct adapter *, const char *,
260		    const char *, int *, int);
261
262#ifdef DEVICE_POLLING
263static poll_handler_t lem_poll;
264#endif /* POLLING */
265
266/*********************************************************************
267 *  FreeBSD Device Interface Entry Points
268 *********************************************************************/
269
270static device_method_t lem_methods[] = {
271	/* Device interface */
272	DEVMETHOD(device_probe, lem_probe),
273	DEVMETHOD(device_attach, lem_attach),
274	DEVMETHOD(device_detach, lem_detach),
275	DEVMETHOD(device_shutdown, lem_shutdown),
276	DEVMETHOD(device_suspend, lem_suspend),
277	DEVMETHOD(device_resume, lem_resume),
278	DEVMETHOD_END
279};
280
281static driver_t lem_driver = {
282	"em", lem_methods, sizeof(struct adapter),
283};
284
285extern devclass_t em_devclass;
286DRIVER_MODULE(lem, pci, lem_driver, em_devclass, 0, 0);
287MODULE_DEPEND(lem, pci, 1, 1, 1);
288MODULE_DEPEND(lem, ether, 1, 1, 1);
289#ifdef DEV_NETMAP
290MODULE_DEPEND(lem, netmap, 1, 1, 1);
291#endif /* DEV_NETMAP */
292
293/*********************************************************************
294 *  Tunable default values.
295 *********************************************************************/
296
297#define EM_TICKS_TO_USECS(ticks)	((1024 * (ticks) + 500) / 1000)
298#define EM_USECS_TO_TICKS(usecs)	((1000 * (usecs) + 512) / 1024)
299
300#define MAX_INTS_PER_SEC	8000
301#define DEFAULT_ITR		(1000000000/(MAX_INTS_PER_SEC * 256))
302
303static int lem_tx_int_delay_dflt = EM_TICKS_TO_USECS(EM_TIDV);
304static int lem_rx_int_delay_dflt = EM_TICKS_TO_USECS(EM_RDTR);
305static int lem_tx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_TADV);
306static int lem_rx_abs_int_delay_dflt = EM_TICKS_TO_USECS(EM_RADV);
307/*
308 * increase lem_rxd and lem_txd to at least 2048 in netmap mode
309 * for better performance.
310 */
311static int lem_rxd = EM_DEFAULT_RXD;
312static int lem_txd = EM_DEFAULT_TXD;
313static int lem_smart_pwr_down = FALSE;
314
315/* Controls whether promiscuous also shows bad packets */
316static int lem_debug_sbp = FALSE;
317
318TUNABLE_INT("hw.em.tx_int_delay", &lem_tx_int_delay_dflt);
319TUNABLE_INT("hw.em.rx_int_delay", &lem_rx_int_delay_dflt);
320TUNABLE_INT("hw.em.tx_abs_int_delay", &lem_tx_abs_int_delay_dflt);
321TUNABLE_INT("hw.em.rx_abs_int_delay", &lem_rx_abs_int_delay_dflt);
322TUNABLE_INT("hw.em.rxd", &lem_rxd);
323TUNABLE_INT("hw.em.txd", &lem_txd);
324TUNABLE_INT("hw.em.smart_pwr_down", &lem_smart_pwr_down);
325TUNABLE_INT("hw.em.sbp", &lem_debug_sbp);
326
327/* Interrupt style - default to fast */
328static int lem_use_legacy_irq = 0;
329TUNABLE_INT("hw.em.use_legacy_irq", &lem_use_legacy_irq);
330
331/* How many packets rxeof tries to clean at a time */
332static int lem_rx_process_limit = 100;
333TUNABLE_INT("hw.em.rx_process_limit", &lem_rx_process_limit);
334
335/* Flow control setting - default to FULL */
336static int lem_fc_setting = e1000_fc_full;
337TUNABLE_INT("hw.em.fc_setting", &lem_fc_setting);
338
339/* Global used in WOL setup with multiport cards */
340static int global_quad_port_a = 0;
341
342#ifdef DEV_NETMAP	/* see ixgbe.c for details */
343#include <dev/netmap/if_lem_netmap.h>
344#endif /* DEV_NETMAP */
345
346/*********************************************************************
347 *  Device identification routine
348 *
349 *  em_probe determines if the driver should be loaded on
350 *  adapter based on PCI vendor/device id of the adapter.
351 *
352 *  return BUS_PROBE_DEFAULT on success, positive on failure
353 *********************************************************************/
354
355static int
356lem_probe(device_t dev)
357{
358	char		adapter_name[60];
359	u16		pci_vendor_id = 0;
360	u16		pci_device_id = 0;
361	u16		pci_subvendor_id = 0;
362	u16		pci_subdevice_id = 0;
363	em_vendor_info_t *ent;
364
365	INIT_DEBUGOUT("em_probe: begin");
366
367	pci_vendor_id = pci_get_vendor(dev);
368	if (pci_vendor_id != EM_VENDOR_ID)
369		return (ENXIO);
370
371	pci_device_id = pci_get_device(dev);
372	pci_subvendor_id = pci_get_subvendor(dev);
373	pci_subdevice_id = pci_get_subdevice(dev);
374
375	ent = lem_vendor_info_array;
376	while (ent->vendor_id != 0) {
377		if ((pci_vendor_id == ent->vendor_id) &&
378		    (pci_device_id == ent->device_id) &&
379
380		    ((pci_subvendor_id == ent->subvendor_id) ||
381		    (ent->subvendor_id == PCI_ANY_ID)) &&
382
383		    ((pci_subdevice_id == ent->subdevice_id) ||
384		    (ent->subdevice_id == PCI_ANY_ID))) {
385			sprintf(adapter_name, "%s %s",
386				lem_strings[ent->index],
387				lem_driver_version);
388			device_set_desc_copy(dev, adapter_name);
389			return (BUS_PROBE_DEFAULT);
390		}
391		ent++;
392	}
393
394	return (ENXIO);
395}
396
397/*********************************************************************
398 *  Device initialization routine
399 *
400 *  The attach entry point is called when the driver is being loaded.
401 *  This routine identifies the type of hardware, allocates all resources
402 *  and initializes the hardware.
403 *
404 *  return 0 on success, positive on failure
405 *********************************************************************/
406
407static int
408lem_attach(device_t dev)
409{
410	struct adapter	*adapter;
411	int		tsize, rsize;
412	int		error = 0;
413
414	INIT_DEBUGOUT("lem_attach: begin");
415
416	adapter = device_get_softc(dev);
417	adapter->dev = adapter->osdep.dev = dev;
418	EM_CORE_LOCK_INIT(adapter, device_get_nameunit(dev));
419	EM_TX_LOCK_INIT(adapter, device_get_nameunit(dev));
420	EM_RX_LOCK_INIT(adapter, device_get_nameunit(dev));
421
422	/* SYSCTL stuff */
423	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
424	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
425	    OID_AUTO, "nvm", CTLTYPE_INT|CTLFLAG_RW, adapter, 0,
426	    lem_sysctl_nvm_info, "I", "NVM Information");
427
428	callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0);
429	callout_init_mtx(&adapter->tx_fifo_timer, &adapter->tx_mtx, 0);
430
431	/* Determine hardware and mac info */
432	lem_identify_hardware(adapter);
433
434	/* Setup PCI resources */
435	if (lem_allocate_pci_resources(adapter)) {
436		device_printf(dev, "Allocation of PCI resources failed\n");
437		error = ENXIO;
438		goto err_pci;
439	}
440
441	/* Do Shared Code initialization */
442	if (e1000_setup_init_funcs(&adapter->hw, TRUE)) {
443		device_printf(dev, "Setup of Shared code failed\n");
444		error = ENXIO;
445		goto err_pci;
446	}
447
448	e1000_get_bus_info(&adapter->hw);
449
450	/* Set up some sysctls for the tunable interrupt delays */
451	lem_add_int_delay_sysctl(adapter, "rx_int_delay",
452	    "receive interrupt delay in usecs", &adapter->rx_int_delay,
453	    E1000_REGISTER(&adapter->hw, E1000_RDTR), lem_rx_int_delay_dflt);
454	lem_add_int_delay_sysctl(adapter, "tx_int_delay",
455	    "transmit interrupt delay in usecs", &adapter->tx_int_delay,
456	    E1000_REGISTER(&adapter->hw, E1000_TIDV), lem_tx_int_delay_dflt);
457	if (adapter->hw.mac.type >= e1000_82540) {
458		lem_add_int_delay_sysctl(adapter, "rx_abs_int_delay",
459		    "receive interrupt delay limit in usecs",
460		    &adapter->rx_abs_int_delay,
461		    E1000_REGISTER(&adapter->hw, E1000_RADV),
462		    lem_rx_abs_int_delay_dflt);
463		lem_add_int_delay_sysctl(adapter, "tx_abs_int_delay",
464		    "transmit interrupt delay limit in usecs",
465		    &adapter->tx_abs_int_delay,
466		    E1000_REGISTER(&adapter->hw, E1000_TADV),
467		    lem_tx_abs_int_delay_dflt);
468		lem_add_int_delay_sysctl(adapter, "itr",
469		    "interrupt delay limit in usecs/4",
470		    &adapter->tx_itr,
471		    E1000_REGISTER(&adapter->hw, E1000_ITR),
472		    DEFAULT_ITR);
473	}
474
475	/* Sysctls for limiting the amount of work done in the taskqueue */
476	lem_add_rx_process_limit(adapter, "rx_processing_limit",
477	    "max number of rx packets to process", &adapter->rx_process_limit,
478	    lem_rx_process_limit);
479
480#ifdef NIC_SEND_COMBINING
481	/* Sysctls to control mitigation */
482	lem_add_rx_process_limit(adapter, "sc_enable",
483	    "driver TDT mitigation", &adapter->sc_enable, 0);
484#endif /* NIC_SEND_COMBINING */
485#ifdef BATCH_DISPATCH
486	lem_add_rx_process_limit(adapter, "batch_enable",
487	    "driver rx batch", &adapter->batch_enable, 0);
488#endif /* BATCH_DISPATCH */
489#ifdef NIC_PARAVIRT
490	lem_add_rx_process_limit(adapter, "rx_retries",
491	    "driver rx retries", &adapter->rx_retries, 0);
492#endif /* NIC_PARAVIRT */
493
494        /* Sysctl for setting the interface flow control */
495	lem_set_flow_cntrl(adapter, "flow_control",
496	    "flow control setting",
497	    &adapter->fc_setting, lem_fc_setting);
498
499	/*
500	 * Validate number of transmit and receive descriptors. It
501	 * must not exceed hardware maximum, and must be multiple
502	 * of E1000_DBA_ALIGN.
503	 */
504	if (((lem_txd * sizeof(struct e1000_tx_desc)) % EM_DBA_ALIGN) != 0 ||
505	    (adapter->hw.mac.type >= e1000_82544 && lem_txd > EM_MAX_TXD) ||
506	    (adapter->hw.mac.type < e1000_82544 && lem_txd > EM_MAX_TXD_82543) ||
507	    (lem_txd < EM_MIN_TXD)) {
508		device_printf(dev, "Using %d TX descriptors instead of %d!\n",
509		    EM_DEFAULT_TXD, lem_txd);
510		adapter->num_tx_desc = EM_DEFAULT_TXD;
511	} else
512		adapter->num_tx_desc = lem_txd;
513	if (((lem_rxd * sizeof(struct e1000_rx_desc)) % EM_DBA_ALIGN) != 0 ||
514	    (adapter->hw.mac.type >= e1000_82544 && lem_rxd > EM_MAX_RXD) ||
515	    (adapter->hw.mac.type < e1000_82544 && lem_rxd > EM_MAX_RXD_82543) ||
516	    (lem_rxd < EM_MIN_RXD)) {
517		device_printf(dev, "Using %d RX descriptors instead of %d!\n",
518		    EM_DEFAULT_RXD, lem_rxd);
519		adapter->num_rx_desc = EM_DEFAULT_RXD;
520	} else
521		adapter->num_rx_desc = lem_rxd;
522
523	adapter->hw.mac.autoneg = DO_AUTO_NEG;
524	adapter->hw.phy.autoneg_wait_to_complete = FALSE;
525	adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
526	adapter->rx_buffer_len = 2048;
527
528	e1000_init_script_state_82541(&adapter->hw, TRUE);
529	e1000_set_tbi_compatibility_82543(&adapter->hw, TRUE);
530
531	/* Copper options */
532	if (adapter->hw.phy.media_type == e1000_media_type_copper) {
533		adapter->hw.phy.mdix = AUTO_ALL_MODES;
534		adapter->hw.phy.disable_polarity_correction = FALSE;
535		adapter->hw.phy.ms_type = EM_MASTER_SLAVE;
536	}
537
538	/*
539	 * Set the frame limits assuming
540	 * standard ethernet sized frames.
541	 */
542	adapter->max_frame_size = ETHERMTU + ETHER_HDR_LEN + ETHERNET_FCS_SIZE;
543	adapter->min_frame_size = ETH_ZLEN + ETHERNET_FCS_SIZE;
544
545	/*
546	 * This controls when hardware reports transmit completion
547	 * status.
548	 */
549	adapter->hw.mac.report_tx_early = 1;
550
551#ifdef NIC_PARAVIRT
552	device_printf(dev, "driver supports paravirt, subdev 0x%x\n",
553		adapter->hw.subsystem_device_id);
554	if (adapter->hw.subsystem_device_id == E1000_PARA_SUBDEV) {
555		uint64_t bus_addr;
556
557		device_printf(dev, "paravirt support on dev %p\n", adapter);
558		tsize = 4096; // XXX one page for the csb
559		if (lem_dma_malloc(adapter, tsize, &adapter->csb_mem, BUS_DMA_NOWAIT)) {
560			device_printf(dev, "Unable to allocate csb memory\n");
561			error = ENOMEM;
562			goto err_csb;
563		}
564		/* Setup the Base of the CSB */
565		adapter->csb = (struct paravirt_csb *)adapter->csb_mem.dma_vaddr;
566		/* force the first kick */
567		adapter->csb->host_need_txkick = 1; /* txring empty */
568		adapter->csb->guest_need_rxkick = 1; /* no rx packets */
569		bus_addr = adapter->csb_mem.dma_paddr;
570		lem_add_rx_process_limit(adapter, "csb_on",
571		    "enable paravirt.", &adapter->csb->guest_csb_on, 0);
572		lem_add_rx_process_limit(adapter, "txc_lim",
573		    "txc_lim", &adapter->csb->host_txcycles_lim, 1);
574
575		/* some stats */
576#define PA_SC(name, var, val)		\
577	lem_add_rx_process_limit(adapter, name, name, var, val)
578		PA_SC("host_need_txkick",&adapter->csb->host_need_txkick, 1);
579		PA_SC("host_rxkick_at",&adapter->csb->host_rxkick_at, ~0);
580		PA_SC("guest_need_txkick",&adapter->csb->guest_need_txkick, 0);
581		PA_SC("guest_need_rxkick",&adapter->csb->guest_need_rxkick, 1);
582		PA_SC("tdt_reg_count",&adapter->tdt_reg_count, 0);
583		PA_SC("tdt_csb_count",&adapter->tdt_csb_count, 0);
584		PA_SC("tdt_int_count",&adapter->tdt_int_count, 0);
585		PA_SC("guest_need_kick_count",&adapter->guest_need_kick_count, 0);
586		/* tell the host where the block is */
587		E1000_WRITE_REG(&adapter->hw, E1000_CSBAH,
588			(u32)(bus_addr >> 32));
589		E1000_WRITE_REG(&adapter->hw, E1000_CSBAL,
590			(u32)bus_addr);
591	}
592#endif /* NIC_PARAVIRT */
593
594	/*
595	 * It seems that the descriptor DMA engine on some PCI cards
596	 * fetches memory past the end of the last descriptor in the
597	 * ring.  These reads are problematic when VT-d (DMAR) busdma
598	 * is used.  Allocate the scratch space to avoid getting
599	 * faults from DMAR, by requesting scratch memory for one more
600	 * descriptor.
601	 */
602	tsize = roundup2((adapter->num_tx_desc + 1) *
603	    sizeof(struct e1000_tx_desc), EM_DBA_ALIGN);
604
605	/* Allocate Transmit Descriptor ring */
606	if (lem_dma_malloc(adapter, tsize, &adapter->txdma, BUS_DMA_NOWAIT)) {
607		device_printf(dev, "Unable to allocate tx_desc memory\n");
608		error = ENOMEM;
609		goto err_tx_desc;
610	}
611	adapter->tx_desc_base =
612	    (struct e1000_tx_desc *)adapter->txdma.dma_vaddr;
613
614	/*
615	 * See comment above txdma allocation for rationale behind +1.
616	 */
617	rsize = roundup2((adapter->num_rx_desc + 1) *
618	    sizeof(struct e1000_rx_desc), EM_DBA_ALIGN);
619
620	/* Allocate Receive Descriptor ring */
621	if (lem_dma_malloc(adapter, rsize, &adapter->rxdma, BUS_DMA_NOWAIT)) {
622		device_printf(dev, "Unable to allocate rx_desc memory\n");
623		error = ENOMEM;
624		goto err_rx_desc;
625	}
626	adapter->rx_desc_base =
627	    (struct e1000_rx_desc *)adapter->rxdma.dma_vaddr;
628
629	/* Allocate multicast array memory. */
630	adapter->mta = malloc(sizeof(u8) * ETH_ADDR_LEN *
631	    MAX_NUM_MULTICAST_ADDRESSES, M_DEVBUF, M_NOWAIT);
632	if (adapter->mta == NULL) {
633		device_printf(dev, "Can not allocate multicast setup array\n");
634		error = ENOMEM;
635		goto err_hw_init;
636	}
637
638	/*
639	** Start from a known state, this is
640	** important in reading the nvm and
641	** mac from that.
642	*/
643	e1000_reset_hw(&adapter->hw);
644
645	/* Make sure we have a good EEPROM before we read from it */
646	if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
647		/*
648		** Some PCI-E parts fail the first check due to
649		** the link being in sleep state, call it again,
650		** if it fails a second time its a real issue.
651		*/
652		if (e1000_validate_nvm_checksum(&adapter->hw) < 0) {
653			device_printf(dev,
654			    "The EEPROM Checksum Is Not Valid\n");
655			error = EIO;
656			goto err_hw_init;
657		}
658	}
659
660	/* Copy the permanent MAC address out of the EEPROM */
661	if (e1000_read_mac_addr(&adapter->hw) < 0) {
662		device_printf(dev, "EEPROM read error while reading MAC"
663		    " address\n");
664		error = EIO;
665		goto err_hw_init;
666	}
667
668	if (!lem_is_valid_ether_addr(adapter->hw.mac.addr)) {
669		device_printf(dev, "Invalid MAC address\n");
670		error = EIO;
671		goto err_hw_init;
672	}
673
674	/* Initialize the hardware */
675	if (lem_hardware_init(adapter)) {
676		device_printf(dev, "Unable to initialize the hardware\n");
677		error = EIO;
678		goto err_hw_init;
679	}
680
681	/* Allocate transmit descriptors and buffers */
682	if (lem_allocate_transmit_structures(adapter)) {
683		device_printf(dev, "Could not setup transmit structures\n");
684		error = ENOMEM;
685		goto err_tx_struct;
686	}
687
688	/* Allocate receive descriptors and buffers */
689	if (lem_allocate_receive_structures(adapter)) {
690		device_printf(dev, "Could not setup receive structures\n");
691		error = ENOMEM;
692		goto err_rx_struct;
693	}
694
695	/*
696	**  Do interrupt configuration
697	*/
698	error = lem_allocate_irq(adapter);
699	if (error)
700		goto err_rx_struct;
701
702	/*
703	 * Get Wake-on-Lan and Management info for later use
704	 */
705	lem_get_wakeup(dev);
706
707	/* Setup OS specific network interface */
708	if (lem_setup_interface(dev, adapter) != 0)
709		goto err_rx_struct;
710
711	/* Initialize statistics */
712	lem_update_stats_counters(adapter);
713
714	adapter->hw.mac.get_link_status = 1;
715	lem_update_link_status(adapter);
716
717	/* Indicate SOL/IDER usage */
718	if (e1000_check_reset_block(&adapter->hw))
719		device_printf(dev,
720		    "PHY reset is blocked due to SOL/IDER session.\n");
721
722	/* Do we need workaround for 82544 PCI-X adapter? */
723	if (adapter->hw.bus.type == e1000_bus_type_pcix &&
724	    adapter->hw.mac.type == e1000_82544)
725		adapter->pcix_82544 = TRUE;
726	else
727		adapter->pcix_82544 = FALSE;
728
729	/* Register for VLAN events */
730	adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
731	    lem_register_vlan, adapter, EVENTHANDLER_PRI_FIRST);
732	adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
733	    lem_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST);
734
735	lem_add_hw_stats(adapter);
736
737	/* Non-AMT based hardware can now take control from firmware */
738	if (adapter->has_manage && !adapter->has_amt)
739		lem_get_hw_control(adapter);
740
741	/* Tell the stack that the interface is not active */
742	if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_OACTIVE | IFF_DRV_RUNNING);
743
744	adapter->led_dev = led_create(lem_led_func, adapter,
745	    device_get_nameunit(dev));
746
747#ifdef DEV_NETMAP
748	lem_netmap_attach(adapter);
749#endif /* DEV_NETMAP */
750	INIT_DEBUGOUT("lem_attach: end");
751
752	return (0);
753
754err_rx_struct:
755	lem_free_transmit_structures(adapter);
756err_tx_struct:
757err_hw_init:
758	lem_release_hw_control(adapter);
759	lem_dma_free(adapter, &adapter->rxdma);
760err_rx_desc:
761	lem_dma_free(adapter, &adapter->txdma);
762err_tx_desc:
763#ifdef NIC_PARAVIRT
764	lem_dma_free(adapter, &adapter->csb_mem);
765err_csb:
766#endif /* NIC_PARAVIRT */
767
768err_pci:
769	if (adapter->ifp != (void *)NULL)
770		if_free(adapter->ifp);
771	lem_free_pci_resources(adapter);
772	free(adapter->mta, M_DEVBUF);
773	EM_TX_LOCK_DESTROY(adapter);
774	EM_RX_LOCK_DESTROY(adapter);
775	EM_CORE_LOCK_DESTROY(adapter);
776
777	return (error);
778}
779
780/*********************************************************************
781 *  Device removal routine
782 *
783 *  The detach entry point is called when the driver is being removed.
784 *  This routine stops the adapter and deallocates all the resources
785 *  that were allocated for driver operation.
786 *
787 *  return 0 on success, positive on failure
788 *********************************************************************/
789
790static int
791lem_detach(device_t dev)
792{
793	struct adapter	*adapter = device_get_softc(dev);
794	if_t ifp = adapter->ifp;
795
796	INIT_DEBUGOUT("em_detach: begin");
797
798	/* Make sure VLANS are not using driver */
799	if (if_vlantrunkinuse(ifp)) {
800		device_printf(dev,"Vlan in use, detach first\n");
801		return (EBUSY);
802	}
803
804#ifdef DEVICE_POLLING
805	if (if_getcapenable(ifp) & IFCAP_POLLING)
806		ether_poll_deregister(ifp);
807#endif
808
809	if (adapter->led_dev != NULL)
810		led_destroy(adapter->led_dev);
811
812	EM_CORE_LOCK(adapter);
813	EM_TX_LOCK(adapter);
814	adapter->in_detach = 1;
815	lem_stop(adapter);
816	e1000_phy_hw_reset(&adapter->hw);
817
818	lem_release_manageability(adapter);
819
820	EM_TX_UNLOCK(adapter);
821	EM_CORE_UNLOCK(adapter);
822
823	/* Unregister VLAN events */
824	if (adapter->vlan_attach != NULL)
825		EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach);
826	if (adapter->vlan_detach != NULL)
827		EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach);
828
829	ether_ifdetach(adapter->ifp);
830	callout_drain(&adapter->timer);
831	callout_drain(&adapter->tx_fifo_timer);
832
833#ifdef DEV_NETMAP
834	netmap_detach(ifp);
835#endif /* DEV_NETMAP */
836	lem_free_pci_resources(adapter);
837	bus_generic_detach(dev);
838	if_free(ifp);
839
840	lem_free_transmit_structures(adapter);
841	lem_free_receive_structures(adapter);
842
843	/* Free Transmit Descriptor ring */
844	if (adapter->tx_desc_base) {
845		lem_dma_free(adapter, &adapter->txdma);
846		adapter->tx_desc_base = NULL;
847	}
848
849	/* Free Receive Descriptor ring */
850	if (adapter->rx_desc_base) {
851		lem_dma_free(adapter, &adapter->rxdma);
852		adapter->rx_desc_base = NULL;
853	}
854
855#ifdef NIC_PARAVIRT
856	if (adapter->csb) {
857		lem_dma_free(adapter, &adapter->csb_mem);
858		adapter->csb = NULL;
859	}
860#endif /* NIC_PARAVIRT */
861	lem_release_hw_control(adapter);
862	free(adapter->mta, M_DEVBUF);
863	EM_TX_LOCK_DESTROY(adapter);
864	EM_RX_LOCK_DESTROY(adapter);
865	EM_CORE_LOCK_DESTROY(adapter);
866
867	return (0);
868}
869
870/*********************************************************************
871 *
872 *  Shutdown entry point
873 *
874 **********************************************************************/
875
876static int
877lem_shutdown(device_t dev)
878{
879	return lem_suspend(dev);
880}
881
882/*
883 * Suspend/resume device methods.
884 */
885static int
886lem_suspend(device_t dev)
887{
888	struct adapter *adapter = device_get_softc(dev);
889
890	EM_CORE_LOCK(adapter);
891
892	lem_release_manageability(adapter);
893	lem_release_hw_control(adapter);
894	lem_enable_wakeup(dev);
895
896	EM_CORE_UNLOCK(adapter);
897
898	return bus_generic_suspend(dev);
899}
900
901static int
902lem_resume(device_t dev)
903{
904	struct adapter *adapter = device_get_softc(dev);
905	if_t ifp = adapter->ifp;
906
907	EM_CORE_LOCK(adapter);
908	lem_init_locked(adapter);
909	lem_init_manageability(adapter);
910	EM_CORE_UNLOCK(adapter);
911	lem_start(ifp);
912
913	return bus_generic_resume(dev);
914}
915
916
917static void
918lem_start_locked(if_t ifp)
919{
920	struct adapter	*adapter = if_getsoftc(ifp);
921	struct mbuf	*m_head;
922
923	EM_TX_LOCK_ASSERT(adapter);
924
925	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING|IFF_DRV_OACTIVE)) !=
926	    IFF_DRV_RUNNING)
927		return;
928	if (!adapter->link_active)
929		return;
930
931        /*
932         * Force a cleanup if number of TX descriptors
933         * available hits the threshold
934         */
935	if (adapter->num_tx_desc_avail <= EM_TX_CLEANUP_THRESHOLD) {
936		lem_txeof(adapter);
937		/* Now do we at least have a minimal? */
938		if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD) {
939			adapter->no_tx_desc_avail1++;
940			return;
941		}
942	}
943
944	while (!if_sendq_empty(ifp)) {
945		m_head = if_dequeue(ifp);
946
947		if (m_head == NULL)
948			break;
949		/*
950		 *  Encapsulation can modify our pointer, and or make it
951		 *  NULL on failure.  In that event, we can't requeue.
952		 */
953		if (lem_xmit(adapter, &m_head)) {
954			if (m_head == NULL)
955				break;
956			if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
957			if_sendq_prepend(ifp, m_head);
958			break;
959		}
960
961		/* Send a copy of the frame to the BPF listener */
962		if_etherbpfmtap(ifp, m_head);
963
964		/* Set timeout in case hardware has problems transmitting. */
965		adapter->watchdog_check = TRUE;
966		adapter->watchdog_time = ticks;
967	}
968	if (adapter->num_tx_desc_avail <= EM_TX_OP_THRESHOLD)
969		if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
970#ifdef NIC_PARAVIRT
971	if (if_getdrvflags(ifp) & IFF_DRV_OACTIVE && adapter->csb &&
972	    adapter->csb->guest_csb_on &&
973	    !(adapter->csb->guest_need_txkick & 1))  {
974		adapter->csb->guest_need_txkick = 1;
975		adapter->guest_need_kick_count++;
976		// XXX memory barrier
977		lem_txeof(adapter); // XXX possibly clear IFF_DRV_OACTIVE
978	}
979#endif /* NIC_PARAVIRT */
980
981	return;
982}
983
984static void
985lem_start(if_t ifp)
986{
987	struct adapter *adapter = if_getsoftc(ifp);
988
989	EM_TX_LOCK(adapter);
990	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
991		lem_start_locked(ifp);
992	EM_TX_UNLOCK(adapter);
993}
994
995/*********************************************************************
996 *  Ioctl entry point
997 *
998 *  em_ioctl is called when the user wants to configure the
999 *  interface.
1000 *
1001 *  return 0 on success, positive on failure
1002 **********************************************************************/
1003
1004static int
1005lem_ioctl(if_t ifp, u_long command, caddr_t data)
1006{
1007	struct adapter	*adapter = if_getsoftc(ifp);
1008	struct ifreq	*ifr = (struct ifreq *)data;
1009#if defined(INET) || defined(INET6)
1010	struct ifaddr	*ifa = (struct ifaddr *)data;
1011#endif
1012	bool		avoid_reset = FALSE;
1013	int		error = 0;
1014
1015	if (adapter->in_detach)
1016		return (error);
1017
1018	switch (command) {
1019	case SIOCSIFADDR:
1020#ifdef INET
1021		if (ifa->ifa_addr->sa_family == AF_INET)
1022			avoid_reset = TRUE;
1023#endif
1024#ifdef INET6
1025		if (ifa->ifa_addr->sa_family == AF_INET6)
1026			avoid_reset = TRUE;
1027#endif
1028		/*
1029		** Calling init results in link renegotiation,
1030		** so we avoid doing it when possible.
1031		*/
1032		if (avoid_reset) {
1033			if_setflagbits(ifp, IFF_UP, 0);
1034			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1035				lem_init(adapter);
1036#ifdef INET
1037			if (!(if_getflags(ifp) & IFF_NOARP))
1038				arp_ifinit(ifp, ifa);
1039#endif
1040		} else
1041			error = ether_ioctl(ifp, command, data);
1042		break;
1043	case SIOCSIFMTU:
1044	    {
1045		int max_frame_size;
1046
1047		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFMTU (Set Interface MTU)");
1048
1049		EM_CORE_LOCK(adapter);
1050		switch (adapter->hw.mac.type) {
1051		case e1000_82542:
1052			max_frame_size = ETHER_MAX_LEN;
1053			break;
1054		default:
1055			max_frame_size = MAX_JUMBO_FRAME_SIZE;
1056		}
1057		if (ifr->ifr_mtu > max_frame_size - ETHER_HDR_LEN -
1058		    ETHER_CRC_LEN) {
1059			EM_CORE_UNLOCK(adapter);
1060			error = EINVAL;
1061			break;
1062		}
1063
1064		if_setmtu(ifp, ifr->ifr_mtu);
1065		adapter->max_frame_size =
1066		    if_getmtu(ifp) + ETHER_HDR_LEN + ETHER_CRC_LEN;
1067		if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1068			lem_init_locked(adapter);
1069		EM_CORE_UNLOCK(adapter);
1070		break;
1071	    }
1072	case SIOCSIFFLAGS:
1073		IOCTL_DEBUGOUT("ioctl rcv'd:\
1074		    SIOCSIFFLAGS (Set Interface Flags)");
1075		EM_CORE_LOCK(adapter);
1076		if (if_getflags(ifp) & IFF_UP) {
1077			if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
1078				if ((if_getflags(ifp) ^ adapter->if_flags) &
1079				    (IFF_PROMISC | IFF_ALLMULTI)) {
1080					lem_disable_promisc(adapter);
1081					lem_set_promisc(adapter);
1082				}
1083			} else
1084				lem_init_locked(adapter);
1085		} else
1086			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1087				EM_TX_LOCK(adapter);
1088				lem_stop(adapter);
1089				EM_TX_UNLOCK(adapter);
1090			}
1091		adapter->if_flags = if_getflags(ifp);
1092		EM_CORE_UNLOCK(adapter);
1093		break;
1094	case SIOCADDMULTI:
1095	case SIOCDELMULTI:
1096		IOCTL_DEBUGOUT("ioctl rcv'd: SIOC(ADD|DEL)MULTI");
1097		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1098			EM_CORE_LOCK(adapter);
1099			lem_disable_intr(adapter);
1100			lem_set_multi(adapter);
1101			if (adapter->hw.mac.type == e1000_82542 &&
1102	    		    adapter->hw.revision_id == E1000_REVISION_2) {
1103				lem_initialize_receive_unit(adapter);
1104			}
1105#ifdef DEVICE_POLLING
1106			if (!(if_getcapenable(ifp) & IFCAP_POLLING))
1107#endif
1108				lem_enable_intr(adapter);
1109			EM_CORE_UNLOCK(adapter);
1110		}
1111		break;
1112	case SIOCSIFMEDIA:
1113		/* Check SOL/IDER usage */
1114		EM_CORE_LOCK(adapter);
1115		if (e1000_check_reset_block(&adapter->hw)) {
1116			EM_CORE_UNLOCK(adapter);
1117			device_printf(adapter->dev, "Media change is"
1118			    " blocked due to SOL/IDER session.\n");
1119			break;
1120		}
1121		EM_CORE_UNLOCK(adapter);
1122	case SIOCGIFMEDIA:
1123		IOCTL_DEBUGOUT("ioctl rcv'd: \
1124		    SIOCxIFMEDIA (Get/Set Interface Media)");
1125		error = ifmedia_ioctl(ifp, ifr, &adapter->media, command);
1126		break;
1127	case SIOCSIFCAP:
1128	    {
1129		int mask, reinit;
1130
1131		IOCTL_DEBUGOUT("ioctl rcv'd: SIOCSIFCAP (Set Capabilities)");
1132		reinit = 0;
1133		mask = ifr->ifr_reqcap ^ if_getcapenable(ifp);
1134#ifdef DEVICE_POLLING
1135		if (mask & IFCAP_POLLING) {
1136			if (ifr->ifr_reqcap & IFCAP_POLLING) {
1137				error = ether_poll_register(lem_poll, ifp);
1138				if (error)
1139					return (error);
1140				EM_CORE_LOCK(adapter);
1141				lem_disable_intr(adapter);
1142				if_setcapenablebit(ifp, IFCAP_POLLING, 0);
1143				EM_CORE_UNLOCK(adapter);
1144			} else {
1145				error = ether_poll_deregister(ifp);
1146				/* Enable interrupt even in error case */
1147				EM_CORE_LOCK(adapter);
1148				lem_enable_intr(adapter);
1149				if_setcapenablebit(ifp, 0, IFCAP_POLLING);
1150				EM_CORE_UNLOCK(adapter);
1151			}
1152		}
1153#endif
1154		if (mask & IFCAP_HWCSUM) {
1155			if_togglecapenable(ifp, IFCAP_HWCSUM);
1156			reinit = 1;
1157		}
1158		if (mask & IFCAP_VLAN_HWTAGGING) {
1159			if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1160			reinit = 1;
1161		}
1162		if ((mask & IFCAP_WOL) &&
1163		    (if_getcapabilities(ifp) & IFCAP_WOL) != 0) {
1164			if (mask & IFCAP_WOL_MCAST)
1165				if_togglecapenable(ifp, IFCAP_WOL_MCAST);
1166			if (mask & IFCAP_WOL_MAGIC)
1167				if_togglecapenable(ifp, IFCAP_WOL_MAGIC);
1168		}
1169		if (reinit && (if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1170			lem_init(adapter);
1171		if_vlancap(ifp);
1172		break;
1173	    }
1174
1175	default:
1176		error = ether_ioctl(ifp, command, data);
1177		break;
1178	}
1179
1180	return (error);
1181}
1182
1183
1184/*********************************************************************
1185 *  Init entry point
1186 *
1187 *  This routine is used in two ways. It is used by the stack as
1188 *  init entry point in network interface structure. It is also used
1189 *  by the driver as a hw/sw initialization routine to get to a
1190 *  consistent state.
1191 *
1192 *  return 0 on success, positive on failure
1193 **********************************************************************/
1194
1195static void
1196lem_init_locked(struct adapter *adapter)
1197{
1198	if_t ifp = adapter->ifp;
1199	device_t	dev = adapter->dev;
1200	u32		pba;
1201
1202	INIT_DEBUGOUT("lem_init: begin");
1203
1204	EM_CORE_LOCK_ASSERT(adapter);
1205
1206	EM_TX_LOCK(adapter);
1207	lem_stop(adapter);
1208	EM_TX_UNLOCK(adapter);
1209
1210	/*
1211	 * Packet Buffer Allocation (PBA)
1212	 * Writing PBA sets the receive portion of the buffer
1213	 * the remainder is used for the transmit buffer.
1214	 *
1215	 * Devices before the 82547 had a Packet Buffer of 64K.
1216	 *   Default allocation: PBA=48K for Rx, leaving 16K for Tx.
1217	 * After the 82547 the buffer was reduced to 40K.
1218	 *   Default allocation: PBA=30K for Rx, leaving 10K for Tx.
1219	 *   Note: default does not leave enough room for Jumbo Frame >10k.
1220	 */
1221	switch (adapter->hw.mac.type) {
1222	case e1000_82547:
1223	case e1000_82547_rev_2: /* 82547: Total Packet Buffer is 40K */
1224		if (adapter->max_frame_size > 8192)
1225			pba = E1000_PBA_22K; /* 22K for Rx, 18K for Tx */
1226		else
1227			pba = E1000_PBA_30K; /* 30K for Rx, 10K for Tx */
1228		adapter->tx_fifo_head = 0;
1229		adapter->tx_head_addr = pba << EM_TX_HEAD_ADDR_SHIFT;
1230		adapter->tx_fifo_size =
1231		    (E1000_PBA_40K - pba) << EM_PBA_BYTES_SHIFT;
1232		break;
1233	default:
1234		/* Devices before 82547 had a Packet Buffer of 64K.   */
1235		if (adapter->max_frame_size > 8192)
1236			pba = E1000_PBA_40K; /* 40K for Rx, 24K for Tx */
1237		else
1238			pba = E1000_PBA_48K; /* 48K for Rx, 16K for Tx */
1239	}
1240
1241	INIT_DEBUGOUT1("lem_init: pba=%dK",pba);
1242	E1000_WRITE_REG(&adapter->hw, E1000_PBA, pba);
1243
1244	/* Get the latest mac address, User can use a LAA */
1245        bcopy(if_getlladdr(adapter->ifp), adapter->hw.mac.addr,
1246              ETHER_ADDR_LEN);
1247
1248	/* Put the address into the Receive Address Array */
1249	e1000_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
1250
1251	/* Initialize the hardware */
1252	if (lem_hardware_init(adapter)) {
1253		device_printf(dev, "Unable to initialize the hardware\n");
1254		return;
1255	}
1256	lem_update_link_status(adapter);
1257
1258	/* Setup VLAN support, basic and offload if available */
1259	E1000_WRITE_REG(&adapter->hw, E1000_VET, ETHERTYPE_VLAN);
1260
1261	/* Set hardware offload abilities */
1262	if_clearhwassist(ifp);
1263	if (adapter->hw.mac.type >= e1000_82543) {
1264		if (if_getcapenable(ifp) & IFCAP_TXCSUM)
1265			if_sethwassistbits(ifp, CSUM_TCP | CSUM_UDP, 0);
1266	}
1267
1268	/* Configure for OS presence */
1269	lem_init_manageability(adapter);
1270
1271	/* Prepare transmit descriptors and buffers */
1272	lem_setup_transmit_structures(adapter);
1273	lem_initialize_transmit_unit(adapter);
1274
1275	/* Setup Multicast table */
1276	lem_set_multi(adapter);
1277
1278	/* Prepare receive descriptors and buffers */
1279	if (lem_setup_receive_structures(adapter)) {
1280		device_printf(dev, "Could not setup receive structures\n");
1281		EM_TX_LOCK(adapter);
1282		lem_stop(adapter);
1283		EM_TX_UNLOCK(adapter);
1284		return;
1285	}
1286	lem_initialize_receive_unit(adapter);
1287
1288	/* Use real VLAN Filter support? */
1289	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
1290		if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1291			/* Use real VLAN Filter support */
1292			lem_setup_vlan_hw_support(adapter);
1293		else {
1294			u32 ctrl;
1295			ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
1296			ctrl |= E1000_CTRL_VME;
1297			E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
1298                }
1299	}
1300
1301	/* Don't lose promiscuous settings */
1302	lem_set_promisc(adapter);
1303
1304	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
1305
1306	callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
1307	e1000_clear_hw_cntrs_base_generic(&adapter->hw);
1308
1309#ifdef DEVICE_POLLING
1310	/*
1311	 * Only enable interrupts if we are not polling, make sure
1312	 * they are off otherwise.
1313	 */
1314	if (if_getcapenable(ifp) & IFCAP_POLLING)
1315		lem_disable_intr(adapter);
1316	else
1317#endif /* DEVICE_POLLING */
1318		lem_enable_intr(adapter);
1319
1320	/* AMT based hardware can now take control from firmware */
1321	if (adapter->has_manage && adapter->has_amt)
1322		lem_get_hw_control(adapter);
1323}
1324
1325static void
1326lem_init(void *arg)
1327{
1328	struct adapter *adapter = arg;
1329
1330	EM_CORE_LOCK(adapter);
1331	lem_init_locked(adapter);
1332	EM_CORE_UNLOCK(adapter);
1333}
1334
1335
1336#ifdef DEVICE_POLLING
1337/*********************************************************************
1338 *
1339 *  Legacy polling routine
1340 *
1341 *********************************************************************/
1342static int
1343lem_poll(if_t ifp, enum poll_cmd cmd, int count)
1344{
1345	struct adapter *adapter = if_getsoftc(ifp);
1346	u32		reg_icr, rx_done = 0;
1347
1348	EM_CORE_LOCK(adapter);
1349	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
1350		EM_CORE_UNLOCK(adapter);
1351		return (rx_done);
1352	}
1353
1354	if (cmd == POLL_AND_CHECK_STATUS) {
1355		reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1356		if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1357			callout_stop(&adapter->timer);
1358			adapter->hw.mac.get_link_status = 1;
1359			lem_update_link_status(adapter);
1360			callout_reset(&adapter->timer, hz,
1361			    lem_local_timer, adapter);
1362		}
1363	}
1364	EM_CORE_UNLOCK(adapter);
1365
1366	lem_rxeof(adapter, count, &rx_done);
1367
1368	EM_TX_LOCK(adapter);
1369	lem_txeof(adapter);
1370	if(!if_sendq_empty(ifp))
1371		lem_start_locked(ifp);
1372	EM_TX_UNLOCK(adapter);
1373	return (rx_done);
1374}
1375#endif /* DEVICE_POLLING */
1376
1377/*********************************************************************
1378 *
1379 *  Legacy Interrupt Service routine
1380 *
1381 *********************************************************************/
1382static void
1383lem_intr(void *arg)
1384{
1385	struct adapter	*adapter = arg;
1386	if_t ifp = adapter->ifp;
1387	u32		reg_icr;
1388
1389
1390	if ((if_getcapenable(ifp) & IFCAP_POLLING) ||
1391	    ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0))
1392		return;
1393
1394	EM_CORE_LOCK(adapter);
1395	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1396	if (reg_icr & E1000_ICR_RXO)
1397		adapter->rx_overruns++;
1398
1399	if ((reg_icr == 0xffffffff) || (reg_icr == 0)) {
1400		EM_CORE_UNLOCK(adapter);
1401		return;
1402	}
1403
1404	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1405		callout_stop(&adapter->timer);
1406		adapter->hw.mac.get_link_status = 1;
1407		lem_update_link_status(adapter);
1408		/* Deal with TX cruft when link lost */
1409		lem_tx_purge(adapter);
1410		callout_reset(&adapter->timer, hz,
1411		    lem_local_timer, adapter);
1412		EM_CORE_UNLOCK(adapter);
1413		return;
1414	}
1415
1416	EM_CORE_UNLOCK(adapter);
1417	lem_rxeof(adapter, -1, NULL);
1418
1419	EM_TX_LOCK(adapter);
1420	lem_txeof(adapter);
1421	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) &&
1422	    (!if_sendq_empty(ifp)))
1423		lem_start_locked(ifp);
1424	EM_TX_UNLOCK(adapter);
1425	return;
1426}
1427
1428
1429static void
1430lem_handle_link(void *context, int pending)
1431{
1432	struct adapter	*adapter = context;
1433	if_t ifp = adapter->ifp;
1434
1435	if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
1436		return;
1437
1438	EM_CORE_LOCK(adapter);
1439	callout_stop(&adapter->timer);
1440	lem_update_link_status(adapter);
1441	/* Deal with TX cruft when link lost */
1442	lem_tx_purge(adapter);
1443	callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
1444	EM_CORE_UNLOCK(adapter);
1445}
1446
1447
1448/* Combined RX/TX handler, used by Legacy and MSI */
1449static void
1450lem_handle_rxtx(void *context, int pending)
1451{
1452	struct adapter	*adapter = context;
1453	if_t ifp = adapter->ifp;
1454
1455
1456	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1457		bool more = lem_rxeof(adapter, adapter->rx_process_limit, NULL);
1458		EM_TX_LOCK(adapter);
1459		lem_txeof(adapter);
1460		if(!if_sendq_empty(ifp))
1461			lem_start_locked(ifp);
1462		EM_TX_UNLOCK(adapter);
1463		if (more) {
1464			taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1465			return;
1466		}
1467	}
1468
1469	if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1470		lem_enable_intr(adapter);
1471}
1472
1473/*********************************************************************
1474 *
1475 *  Fast Legacy/MSI Combined Interrupt Service routine
1476 *
1477 *********************************************************************/
1478static int
1479lem_irq_fast(void *arg)
1480{
1481	struct adapter	*adapter = arg;
1482	if_t ifp;
1483	u32		reg_icr;
1484
1485	ifp = adapter->ifp;
1486
1487	reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR);
1488
1489	/* Hot eject?  */
1490	if (reg_icr == 0xffffffff)
1491		return FILTER_STRAY;
1492
1493	/* Definitely not our interrupt.  */
1494	if (reg_icr == 0x0)
1495		return FILTER_STRAY;
1496
1497	/*
1498	 * Mask interrupts until the taskqueue is finished running.  This is
1499	 * cheap, just assume that it is needed.  This also works around the
1500	 * MSI message reordering errata on certain systems.
1501	 */
1502	lem_disable_intr(adapter);
1503	taskqueue_enqueue(adapter->tq, &adapter->rxtx_task);
1504
1505	/* Link status change */
1506	if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
1507		adapter->hw.mac.get_link_status = 1;
1508		taskqueue_enqueue(taskqueue_fast, &adapter->link_task);
1509	}
1510
1511	if (reg_icr & E1000_ICR_RXO)
1512		adapter->rx_overruns++;
1513	return FILTER_HANDLED;
1514}
1515
1516
1517/*********************************************************************
1518 *
1519 *  Media Ioctl callback
1520 *
1521 *  This routine is called whenever the user queries the status of
1522 *  the interface using ifconfig.
1523 *
1524 **********************************************************************/
1525static void
1526lem_media_status(if_t ifp, struct ifmediareq *ifmr)
1527{
1528	struct adapter *adapter = if_getsoftc(ifp);
1529	u_char fiber_type = IFM_1000_SX;
1530
1531	INIT_DEBUGOUT("lem_media_status: begin");
1532
1533	EM_CORE_LOCK(adapter);
1534	lem_update_link_status(adapter);
1535
1536	ifmr->ifm_status = IFM_AVALID;
1537	ifmr->ifm_active = IFM_ETHER;
1538
1539	if (!adapter->link_active) {
1540		EM_CORE_UNLOCK(adapter);
1541		return;
1542	}
1543
1544	ifmr->ifm_status |= IFM_ACTIVE;
1545
1546	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
1547	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
1548		if (adapter->hw.mac.type == e1000_82545)
1549			fiber_type = IFM_1000_LX;
1550		ifmr->ifm_active |= fiber_type | IFM_FDX;
1551	} else {
1552		switch (adapter->link_speed) {
1553		case 10:
1554			ifmr->ifm_active |= IFM_10_T;
1555			break;
1556		case 100:
1557			ifmr->ifm_active |= IFM_100_TX;
1558			break;
1559		case 1000:
1560			ifmr->ifm_active |= IFM_1000_T;
1561			break;
1562		}
1563		if (adapter->link_duplex == FULL_DUPLEX)
1564			ifmr->ifm_active |= IFM_FDX;
1565		else
1566			ifmr->ifm_active |= IFM_HDX;
1567	}
1568	EM_CORE_UNLOCK(adapter);
1569}
1570
1571/*********************************************************************
1572 *
1573 *  Media Ioctl callback
1574 *
1575 *  This routine is called when the user changes speed/duplex using
1576 *  media/mediopt option with ifconfig.
1577 *
1578 **********************************************************************/
1579static int
1580lem_media_change(if_t ifp)
1581{
1582	struct adapter *adapter = if_getsoftc(ifp);
1583	struct ifmedia  *ifm = &adapter->media;
1584
1585	INIT_DEBUGOUT("lem_media_change: begin");
1586
1587	if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
1588		return (EINVAL);
1589
1590	EM_CORE_LOCK(adapter);
1591	switch (IFM_SUBTYPE(ifm->ifm_media)) {
1592	case IFM_AUTO:
1593		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1594		adapter->hw.phy.autoneg_advertised = AUTONEG_ADV_DEFAULT;
1595		break;
1596	case IFM_1000_LX:
1597	case IFM_1000_SX:
1598	case IFM_1000_T:
1599		adapter->hw.mac.autoneg = DO_AUTO_NEG;
1600		adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
1601		break;
1602	case IFM_100_TX:
1603		adapter->hw.mac.autoneg = FALSE;
1604		adapter->hw.phy.autoneg_advertised = 0;
1605		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1606			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_FULL;
1607		else
1608			adapter->hw.mac.forced_speed_duplex = ADVERTISE_100_HALF;
1609		break;
1610	case IFM_10_T:
1611		adapter->hw.mac.autoneg = FALSE;
1612		adapter->hw.phy.autoneg_advertised = 0;
1613		if ((ifm->ifm_media & IFM_GMASK) == IFM_FDX)
1614			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_FULL;
1615		else
1616			adapter->hw.mac.forced_speed_duplex = ADVERTISE_10_HALF;
1617		break;
1618	default:
1619		device_printf(adapter->dev, "Unsupported media type\n");
1620	}
1621
1622	lem_init_locked(adapter);
1623	EM_CORE_UNLOCK(adapter);
1624
1625	return (0);
1626}
1627
1628/*********************************************************************
1629 *
1630 *  This routine maps the mbufs to tx descriptors.
1631 *
1632 *  return 0 on success, positive on failure
1633 **********************************************************************/
1634
1635static int
1636lem_xmit(struct adapter *adapter, struct mbuf **m_headp)
1637{
1638	bus_dma_segment_t	segs[EM_MAX_SCATTER];
1639	bus_dmamap_t		map;
1640	struct em_buffer	*tx_buffer, *tx_buffer_mapped;
1641	struct e1000_tx_desc	*ctxd = NULL;
1642	struct mbuf		*m_head;
1643	u32			txd_upper, txd_lower, txd_used, txd_saved;
1644	int			error, nsegs, i, j, first, last = 0;
1645
1646	m_head = *m_headp;
1647	txd_upper = txd_lower = txd_used = txd_saved = 0;
1648
1649	/*
1650	** When doing checksum offload, it is critical to
1651	** make sure the first mbuf has more than header,
1652	** because that routine expects data to be present.
1653	*/
1654	if ((m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) &&
1655	    (m_head->m_len < ETHER_HDR_LEN + sizeof(struct ip))) {
1656		m_head = m_pullup(m_head, ETHER_HDR_LEN + sizeof(struct ip));
1657		*m_headp = m_head;
1658		if (m_head == NULL)
1659			return (ENOBUFS);
1660	}
1661
1662	/*
1663	 * Map the packet for DMA
1664	 *
1665	 * Capture the first descriptor index,
1666	 * this descriptor will have the index
1667	 * of the EOP which is the only one that
1668	 * now gets a DONE bit writeback.
1669	 */
1670	first = adapter->next_avail_tx_desc;
1671	tx_buffer = &adapter->tx_buffer_area[first];
1672	tx_buffer_mapped = tx_buffer;
1673	map = tx_buffer->map;
1674
1675	error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1676	    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1677
1678	/*
1679	 * There are two types of errors we can (try) to handle:
1680	 * - EFBIG means the mbuf chain was too long and bus_dma ran
1681	 *   out of segments.  Defragment the mbuf chain and try again.
1682	 * - ENOMEM means bus_dma could not obtain enough bounce buffers
1683	 *   at this point in time.  Defer sending and try again later.
1684	 * All other errors, in particular EINVAL, are fatal and prevent the
1685	 * mbuf chain from ever going through.  Drop it and report error.
1686	 */
1687	if (error == EFBIG) {
1688		struct mbuf *m;
1689
1690		m = m_collapse(*m_headp, M_NOWAIT, EM_MAX_SCATTER);
1691		if (m == NULL) {
1692			adapter->mbuf_defrag_failed++;
1693			m_freem(*m_headp);
1694			*m_headp = NULL;
1695			return (ENOBUFS);
1696		}
1697		*m_headp = m;
1698
1699		/* Try it again */
1700		error = bus_dmamap_load_mbuf_sg(adapter->txtag, map,
1701		    *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
1702
1703		if (error) {
1704			adapter->no_tx_dma_setup++;
1705			m_freem(*m_headp);
1706			*m_headp = NULL;
1707			return (error);
1708		}
1709	} else if (error != 0) {
1710		adapter->no_tx_dma_setup++;
1711		return (error);
1712	}
1713
1714        if (adapter->num_tx_desc_avail < (nsegs + 2)) {
1715                adapter->no_tx_desc_avail2++;
1716		bus_dmamap_unload(adapter->txtag, map);
1717		return (ENOBUFS);
1718        }
1719	m_head = *m_headp;
1720
1721	/* Do hardware assists */
1722	if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD)
1723		lem_transmit_checksum_setup(adapter,  m_head,
1724		    &txd_upper, &txd_lower);
1725
1726	i = adapter->next_avail_tx_desc;
1727	if (adapter->pcix_82544)
1728		txd_saved = i;
1729
1730	/* Set up our transmit descriptors */
1731	for (j = 0; j < nsegs; j++) {
1732		bus_size_t seg_len;
1733		bus_addr_t seg_addr;
1734		/* If adapter is 82544 and on PCIX bus */
1735		if(adapter->pcix_82544) {
1736			DESC_ARRAY	desc_array;
1737			u32		array_elements, counter;
1738			/*
1739			 * Check the Address and Length combination and
1740			 * split the data accordingly
1741			 */
1742			array_elements = lem_fill_descriptors(segs[j].ds_addr,
1743			    segs[j].ds_len, &desc_array);
1744			for (counter = 0; counter < array_elements; counter++) {
1745				if (txd_used == adapter->num_tx_desc_avail) {
1746					adapter->next_avail_tx_desc = txd_saved;
1747					adapter->no_tx_desc_avail2++;
1748					bus_dmamap_unload(adapter->txtag, map);
1749					return (ENOBUFS);
1750				}
1751				tx_buffer = &adapter->tx_buffer_area[i];
1752				ctxd = &adapter->tx_desc_base[i];
1753				ctxd->buffer_addr = htole64(
1754				    desc_array.descriptor[counter].address);
1755				ctxd->lower.data = htole32(
1756				    (adapter->txd_cmd | txd_lower | (u16)
1757				    desc_array.descriptor[counter].length));
1758				ctxd->upper.data =
1759				    htole32((txd_upper));
1760				last = i;
1761				if (++i == adapter->num_tx_desc)
1762                                         i = 0;
1763				tx_buffer->m_head = NULL;
1764				tx_buffer->next_eop = -1;
1765				txd_used++;
1766                        }
1767		} else {
1768			tx_buffer = &adapter->tx_buffer_area[i];
1769			ctxd = &adapter->tx_desc_base[i];
1770			seg_addr = segs[j].ds_addr;
1771			seg_len  = segs[j].ds_len;
1772			ctxd->buffer_addr = htole64(seg_addr);
1773			ctxd->lower.data = htole32(
1774			adapter->txd_cmd | txd_lower | seg_len);
1775			ctxd->upper.data =
1776			    htole32(txd_upper);
1777			last = i;
1778			if (++i == adapter->num_tx_desc)
1779				i = 0;
1780			tx_buffer->m_head = NULL;
1781			tx_buffer->next_eop = -1;
1782		}
1783	}
1784
1785	adapter->next_avail_tx_desc = i;
1786
1787	if (adapter->pcix_82544)
1788		adapter->num_tx_desc_avail -= txd_used;
1789	else
1790		adapter->num_tx_desc_avail -= nsegs;
1791
1792	if (m_head->m_flags & M_VLANTAG) {
1793		/* Set the vlan id. */
1794		ctxd->upper.fields.special =
1795		    htole16(m_head->m_pkthdr.ether_vtag);
1796                /* Tell hardware to add tag */
1797                ctxd->lower.data |= htole32(E1000_TXD_CMD_VLE);
1798        }
1799
1800        tx_buffer->m_head = m_head;
1801	tx_buffer_mapped->map = tx_buffer->map;
1802	tx_buffer->map = map;
1803        bus_dmamap_sync(adapter->txtag, map, BUS_DMASYNC_PREWRITE);
1804
1805        /*
1806         * Last Descriptor of Packet
1807	 * needs End Of Packet (EOP)
1808	 * and Report Status (RS)
1809         */
1810        ctxd->lower.data |=
1811	    htole32(E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS);
1812	/*
1813	 * Keep track in the first buffer which
1814	 * descriptor will be written back
1815	 */
1816	tx_buffer = &adapter->tx_buffer_area[first];
1817	tx_buffer->next_eop = last;
1818	adapter->watchdog_time = ticks;
1819
1820	/*
1821	 * Advance the Transmit Descriptor Tail (TDT), this tells the E1000
1822	 * that this frame is available to transmit.
1823	 */
1824	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
1825	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1826
1827#ifdef NIC_PARAVIRT
1828	if (adapter->csb) {
1829		adapter->csb->guest_tdt = i;
1830		/* XXX memory barrier ? */
1831 		if (adapter->csb->guest_csb_on &&
1832		    !(adapter->csb->host_need_txkick & 1)) {
1833			/* XXX maybe useless
1834			 * clean the ring. maybe do it before ?
1835			 * maybe a little bit of histeresys ?
1836			 */
1837			if (adapter->num_tx_desc_avail <= 64) {// XXX
1838				lem_txeof(adapter);
1839			}
1840			return (0);
1841		}
1842	}
1843#endif /* NIC_PARAVIRT */
1844
1845#ifdef NIC_SEND_COMBINING
1846	if (adapter->sc_enable) {
1847		if (adapter->shadow_tdt & MIT_PENDING_INT) {
1848			/* signal intr and data pending */
1849			adapter->shadow_tdt = MIT_PENDING_TDT | (i & 0xffff);
1850			return (0);
1851		} else {
1852			adapter->shadow_tdt = MIT_PENDING_INT;
1853		}
1854	}
1855#endif /* NIC_SEND_COMBINING */
1856
1857	if (adapter->hw.mac.type == e1000_82547 &&
1858	    adapter->link_duplex == HALF_DUPLEX)
1859		lem_82547_move_tail(adapter);
1860	else {
1861		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), i);
1862		if (adapter->hw.mac.type == e1000_82547)
1863			lem_82547_update_fifo_head(adapter,
1864			    m_head->m_pkthdr.len);
1865	}
1866
1867	return (0);
1868}
1869
1870/*********************************************************************
1871 *
1872 * 82547 workaround to avoid controller hang in half-duplex environment.
1873 * The workaround is to avoid queuing a large packet that would span
1874 * the internal Tx FIFO ring boundary. We need to reset the FIFO pointers
1875 * in this case. We do that only when FIFO is quiescent.
1876 *
1877 **********************************************************************/
1878static void
1879lem_82547_move_tail(void *arg)
1880{
1881	struct adapter *adapter = arg;
1882	struct e1000_tx_desc *tx_desc;
1883	u16	hw_tdt, sw_tdt, length = 0;
1884	bool	eop = 0;
1885
1886	EM_TX_LOCK_ASSERT(adapter);
1887
1888	hw_tdt = E1000_READ_REG(&adapter->hw, E1000_TDT(0));
1889	sw_tdt = adapter->next_avail_tx_desc;
1890
1891	while (hw_tdt != sw_tdt) {
1892		tx_desc = &adapter->tx_desc_base[hw_tdt];
1893		length += tx_desc->lower.flags.length;
1894		eop = tx_desc->lower.data & E1000_TXD_CMD_EOP;
1895		if (++hw_tdt == adapter->num_tx_desc)
1896			hw_tdt = 0;
1897
1898		if (eop) {
1899			if (lem_82547_fifo_workaround(adapter, length)) {
1900				adapter->tx_fifo_wrk_cnt++;
1901				callout_reset(&adapter->tx_fifo_timer, 1,
1902					lem_82547_move_tail, adapter);
1903				break;
1904			}
1905			E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), hw_tdt);
1906			lem_82547_update_fifo_head(adapter, length);
1907			length = 0;
1908		}
1909	}
1910}
1911
1912static int
1913lem_82547_fifo_workaround(struct adapter *adapter, int len)
1914{
1915	int fifo_space, fifo_pkt_len;
1916
1917	fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1918
1919	if (adapter->link_duplex == HALF_DUPLEX) {
1920		fifo_space = adapter->tx_fifo_size - adapter->tx_fifo_head;
1921
1922		if (fifo_pkt_len >= (EM_82547_PKT_THRESH + fifo_space)) {
1923			if (lem_82547_tx_fifo_reset(adapter))
1924				return (0);
1925			else
1926				return (1);
1927		}
1928	}
1929
1930	return (0);
1931}
1932
1933static void
1934lem_82547_update_fifo_head(struct adapter *adapter, int len)
1935{
1936	int fifo_pkt_len = roundup2(len + EM_FIFO_HDR, EM_FIFO_HDR);
1937
1938	/* tx_fifo_head is always 16 byte aligned */
1939	adapter->tx_fifo_head += fifo_pkt_len;
1940	if (adapter->tx_fifo_head >= adapter->tx_fifo_size) {
1941		adapter->tx_fifo_head -= adapter->tx_fifo_size;
1942	}
1943}
1944
1945
1946static int
1947lem_82547_tx_fifo_reset(struct adapter *adapter)
1948{
1949	u32 tctl;
1950
1951	if ((E1000_READ_REG(&adapter->hw, E1000_TDT(0)) ==
1952	    E1000_READ_REG(&adapter->hw, E1000_TDH(0))) &&
1953	    (E1000_READ_REG(&adapter->hw, E1000_TDFT) ==
1954	    E1000_READ_REG(&adapter->hw, E1000_TDFH)) &&
1955	    (E1000_READ_REG(&adapter->hw, E1000_TDFTS) ==
1956	    E1000_READ_REG(&adapter->hw, E1000_TDFHS)) &&
1957	    (E1000_READ_REG(&adapter->hw, E1000_TDFPC) == 0)) {
1958		/* Disable TX unit */
1959		tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
1960		E1000_WRITE_REG(&adapter->hw, E1000_TCTL,
1961		    tctl & ~E1000_TCTL_EN);
1962
1963		/* Reset FIFO pointers */
1964		E1000_WRITE_REG(&adapter->hw, E1000_TDFT,
1965		    adapter->tx_head_addr);
1966		E1000_WRITE_REG(&adapter->hw, E1000_TDFH,
1967		    adapter->tx_head_addr);
1968		E1000_WRITE_REG(&adapter->hw, E1000_TDFTS,
1969		    adapter->tx_head_addr);
1970		E1000_WRITE_REG(&adapter->hw, E1000_TDFHS,
1971		    adapter->tx_head_addr);
1972
1973		/* Re-enable TX unit */
1974		E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
1975		E1000_WRITE_FLUSH(&adapter->hw);
1976
1977		adapter->tx_fifo_head = 0;
1978		adapter->tx_fifo_reset_cnt++;
1979
1980		return (TRUE);
1981	}
1982	else {
1983		return (FALSE);
1984	}
1985}
1986
1987static void
1988lem_set_promisc(struct adapter *adapter)
1989{
1990	if_t ifp = adapter->ifp;
1991	u32		reg_rctl;
1992
1993	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
1994
1995	if (if_getflags(ifp) & IFF_PROMISC) {
1996		reg_rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
1997		/* Turn this on if you want to see bad packets */
1998		if (lem_debug_sbp)
1999			reg_rctl |= E1000_RCTL_SBP;
2000		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2001	} else if (if_getflags(ifp) & IFF_ALLMULTI) {
2002		reg_rctl |= E1000_RCTL_MPE;
2003		reg_rctl &= ~E1000_RCTL_UPE;
2004		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2005	}
2006}
2007
2008static void
2009lem_disable_promisc(struct adapter *adapter)
2010{
2011	if_t ifp = adapter->ifp;
2012	u32		reg_rctl;
2013	int		mcnt = 0;
2014
2015	reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2016	reg_rctl &=  (~E1000_RCTL_UPE);
2017	if (if_getflags(ifp) & IFF_ALLMULTI)
2018		mcnt = MAX_NUM_MULTICAST_ADDRESSES;
2019	else
2020		mcnt = if_multiaddr_count(ifp, MAX_NUM_MULTICAST_ADDRESSES);
2021
2022	/* Don't disable if in MAX groups */
2023	if (mcnt < MAX_NUM_MULTICAST_ADDRESSES)
2024		reg_rctl &=  (~E1000_RCTL_MPE);
2025	reg_rctl &=  (~E1000_RCTL_SBP);
2026	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2027}
2028
2029
2030/*********************************************************************
2031 *  Multicast Update
2032 *
2033 *  This routine is called whenever multicast address list is updated.
2034 *
2035 **********************************************************************/
2036
2037static void
2038lem_set_multi(struct adapter *adapter)
2039{
2040	if_t ifp = adapter->ifp;
2041	u32 reg_rctl = 0;
2042	u8  *mta; /* Multicast array memory */
2043	int mcnt = 0;
2044
2045	IOCTL_DEBUGOUT("lem_set_multi: begin");
2046
2047	mta = adapter->mta;
2048	bzero(mta, sizeof(u8) * ETH_ADDR_LEN * MAX_NUM_MULTICAST_ADDRESSES);
2049
2050	if (adapter->hw.mac.type == e1000_82542 &&
2051	    adapter->hw.revision_id == E1000_REVISION_2) {
2052		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2053		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2054			e1000_pci_clear_mwi(&adapter->hw);
2055		reg_rctl |= E1000_RCTL_RST;
2056		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2057		msec_delay(5);
2058	}
2059
2060	if_multiaddr_array(ifp, mta, &mcnt, MAX_NUM_MULTICAST_ADDRESSES);
2061
2062	if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES) {
2063		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2064		reg_rctl |= E1000_RCTL_MPE;
2065		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2066	} else
2067		e1000_update_mc_addr_list(&adapter->hw, mta, mcnt);
2068
2069	if (adapter->hw.mac.type == e1000_82542 &&
2070	    adapter->hw.revision_id == E1000_REVISION_2) {
2071		reg_rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
2072		reg_rctl &= ~E1000_RCTL_RST;
2073		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, reg_rctl);
2074		msec_delay(5);
2075		if (adapter->hw.bus.pci_cmd_word & CMD_MEM_WRT_INVALIDATE)
2076			e1000_pci_set_mwi(&adapter->hw);
2077	}
2078}
2079
2080
2081/*********************************************************************
2082 *  Timer routine
2083 *
2084 *  This routine checks for link status and updates statistics.
2085 *
2086 **********************************************************************/
2087
2088static void
2089lem_local_timer(void *arg)
2090{
2091	struct adapter	*adapter = arg;
2092
2093	EM_CORE_LOCK_ASSERT(adapter);
2094
2095	lem_update_link_status(adapter);
2096	lem_update_stats_counters(adapter);
2097
2098	lem_smartspeed(adapter);
2099
2100#ifdef NIC_PARAVIRT
2101	/* recover space if needed */
2102	if (adapter->csb && adapter->csb->guest_csb_on &&
2103	    (adapter->watchdog_check == TRUE) &&
2104	    (ticks - adapter->watchdog_time > EM_WATCHDOG) &&
2105	    (adapter->num_tx_desc_avail != adapter->num_tx_desc) ) {
2106		lem_txeof(adapter);
2107		/*
2108		 * lem_txeof() normally (except when space in the queue
2109		 * runs low XXX) cleans watchdog_check so that
2110		 * we do not hung.
2111		 */
2112	}
2113#endif /* NIC_PARAVIRT */
2114	/*
2115	 * We check the watchdog: the time since
2116	 * the last TX descriptor was cleaned.
2117	 * This implies a functional TX engine.
2118	 */
2119	if ((adapter->watchdog_check == TRUE) &&
2120	    (ticks - adapter->watchdog_time > EM_WATCHDOG))
2121		goto hung;
2122
2123	callout_reset(&adapter->timer, hz, lem_local_timer, adapter);
2124	return;
2125hung:
2126	device_printf(adapter->dev, "Watchdog timeout -- resetting\n");
2127	if_setdrvflagbits(adapter->ifp, 0, IFF_DRV_RUNNING);
2128	adapter->watchdog_events++;
2129	lem_init_locked(adapter);
2130}
2131
2132static void
2133lem_update_link_status(struct adapter *adapter)
2134{
2135	struct e1000_hw *hw = &adapter->hw;
2136	if_t ifp = adapter->ifp;
2137	device_t dev = adapter->dev;
2138	u32 link_check = 0;
2139
2140	/* Get the cached link value or read phy for real */
2141	switch (hw->phy.media_type) {
2142	case e1000_media_type_copper:
2143		if (hw->mac.get_link_status) {
2144			/* Do the work to read phy */
2145			e1000_check_for_link(hw);
2146			link_check = !hw->mac.get_link_status;
2147			if (link_check) /* ESB2 fix */
2148				e1000_cfg_on_link_up(hw);
2149		} else
2150			link_check = TRUE;
2151		break;
2152	case e1000_media_type_fiber:
2153		e1000_check_for_link(hw);
2154		link_check = (E1000_READ_REG(hw, E1000_STATUS) &
2155                                 E1000_STATUS_LU);
2156		break;
2157	case e1000_media_type_internal_serdes:
2158		e1000_check_for_link(hw);
2159		link_check = adapter->hw.mac.serdes_has_link;
2160		break;
2161	default:
2162	case e1000_media_type_unknown:
2163		break;
2164	}
2165
2166	/* Now check for a transition */
2167	if (link_check && (adapter->link_active == 0)) {
2168		e1000_get_speed_and_duplex(hw, &adapter->link_speed,
2169		    &adapter->link_duplex);
2170		if (bootverbose)
2171			device_printf(dev, "Link is up %d Mbps %s\n",
2172			    adapter->link_speed,
2173			    ((adapter->link_duplex == FULL_DUPLEX) ?
2174			    "Full Duplex" : "Half Duplex"));
2175		adapter->link_active = 1;
2176		adapter->smartspeed = 0;
2177		if_setbaudrate(ifp, adapter->link_speed * 1000000);
2178		if_link_state_change(ifp, LINK_STATE_UP);
2179	} else if (!link_check && (adapter->link_active == 1)) {
2180		if_setbaudrate(ifp, 0);
2181		adapter->link_speed = 0;
2182		adapter->link_duplex = 0;
2183		if (bootverbose)
2184			device_printf(dev, "Link is Down\n");
2185		adapter->link_active = 0;
2186		/* Link down, disable watchdog */
2187		adapter->watchdog_check = FALSE;
2188		if_link_state_change(ifp, LINK_STATE_DOWN);
2189	}
2190}
2191
2192/*********************************************************************
2193 *
2194 *  This routine disables all traffic on the adapter by issuing a
2195 *  global reset on the MAC and deallocates TX/RX buffers.
2196 *
2197 *  This routine should always be called with BOTH the CORE
2198 *  and TX locks.
2199 **********************************************************************/
2200
2201static void
2202lem_stop(void *arg)
2203{
2204	struct adapter	*adapter = arg;
2205	if_t ifp = adapter->ifp;
2206
2207	EM_CORE_LOCK_ASSERT(adapter);
2208	EM_TX_LOCK_ASSERT(adapter);
2209
2210	INIT_DEBUGOUT("lem_stop: begin");
2211
2212	lem_disable_intr(adapter);
2213	callout_stop(&adapter->timer);
2214	callout_stop(&adapter->tx_fifo_timer);
2215
2216	/* Tell the stack that the interface is no longer active */
2217	if_setdrvflagbits(ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
2218
2219	e1000_reset_hw(&adapter->hw);
2220	if (adapter->hw.mac.type >= e1000_82544)
2221		E1000_WRITE_REG(&adapter->hw, E1000_WUC, 0);
2222
2223	e1000_led_off(&adapter->hw);
2224	e1000_cleanup_led(&adapter->hw);
2225}
2226
2227
2228/*********************************************************************
2229 *
2230 *  Determine hardware revision.
2231 *
2232 **********************************************************************/
2233static void
2234lem_identify_hardware(struct adapter *adapter)
2235{
2236	device_t dev = adapter->dev;
2237
2238	/* Make sure our PCI config space has the necessary stuff set */
2239	pci_enable_busmaster(dev);
2240	adapter->hw.bus.pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
2241
2242	/* Save off the information about this board */
2243	adapter->hw.vendor_id = pci_get_vendor(dev);
2244	adapter->hw.device_id = pci_get_device(dev);
2245	adapter->hw.revision_id = pci_read_config(dev, PCIR_REVID, 1);
2246	adapter->hw.subsystem_vendor_id =
2247	    pci_read_config(dev, PCIR_SUBVEND_0, 2);
2248	adapter->hw.subsystem_device_id =
2249	    pci_read_config(dev, PCIR_SUBDEV_0, 2);
2250
2251	/* Do Shared Code Init and Setup */
2252	if (e1000_set_mac_type(&adapter->hw)) {
2253		device_printf(dev, "Setup init failure\n");
2254		return;
2255	}
2256}
2257
2258static int
2259lem_allocate_pci_resources(struct adapter *adapter)
2260{
2261	device_t	dev = adapter->dev;
2262	int		val, rid, error = E1000_SUCCESS;
2263
2264	rid = PCIR_BAR(0);
2265	adapter->memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
2266	    &rid, RF_ACTIVE);
2267	if (adapter->memory == NULL) {
2268		device_printf(dev, "Unable to allocate bus resource: memory\n");
2269		return (ENXIO);
2270	}
2271	adapter->osdep.mem_bus_space_tag =
2272	    rman_get_bustag(adapter->memory);
2273	adapter->osdep.mem_bus_space_handle =
2274	    rman_get_bushandle(adapter->memory);
2275	adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle;
2276
2277	/* Only older adapters use IO mapping */
2278	if (adapter->hw.mac.type > e1000_82543) {
2279		/* Figure our where our IO BAR is ? */
2280		for (rid = PCIR_BAR(0); rid < PCIR_CIS;) {
2281			val = pci_read_config(dev, rid, 4);
2282			if (EM_BAR_TYPE(val) == EM_BAR_TYPE_IO) {
2283				adapter->io_rid = rid;
2284				break;
2285			}
2286			rid += 4;
2287			/* check for 64bit BAR */
2288			if (EM_BAR_MEM_TYPE(val) == EM_BAR_MEM_TYPE_64BIT)
2289				rid += 4;
2290		}
2291		if (rid >= PCIR_CIS) {
2292			device_printf(dev, "Unable to locate IO BAR\n");
2293			return (ENXIO);
2294		}
2295		adapter->ioport = bus_alloc_resource_any(dev,
2296		    SYS_RES_IOPORT, &adapter->io_rid, RF_ACTIVE);
2297		if (adapter->ioport == NULL) {
2298			device_printf(dev, "Unable to allocate bus resource: "
2299			    "ioport\n");
2300			return (ENXIO);
2301		}
2302		adapter->hw.io_base = 0;
2303		adapter->osdep.io_bus_space_tag =
2304		    rman_get_bustag(adapter->ioport);
2305		adapter->osdep.io_bus_space_handle =
2306		    rman_get_bushandle(adapter->ioport);
2307	}
2308
2309	adapter->hw.back = &adapter->osdep;
2310
2311	return (error);
2312}
2313
2314/*********************************************************************
2315 *
2316 *  Setup the Legacy or MSI Interrupt handler
2317 *
2318 **********************************************************************/
2319static int
2320lem_allocate_irq(struct adapter *adapter)
2321{
2322	device_t dev = adapter->dev;
2323	int error, rid = 0;
2324
2325	/* Manually turn off all interrupts */
2326	E1000_WRITE_REG(&adapter->hw, E1000_IMC, 0xffffffff);
2327
2328	/* We allocate a single interrupt resource */
2329	adapter->res[0] = bus_alloc_resource_any(dev,
2330	    SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE);
2331	if (adapter->res[0] == NULL) {
2332		device_printf(dev, "Unable to allocate bus resource: "
2333		    "interrupt\n");
2334		return (ENXIO);
2335	}
2336
2337	/* Do Legacy setup? */
2338	if (lem_use_legacy_irq) {
2339		if ((error = bus_setup_intr(dev, adapter->res[0],
2340	    	    INTR_TYPE_NET | INTR_MPSAFE, NULL, lem_intr, adapter,
2341	    	    &adapter->tag[0])) != 0) {
2342			device_printf(dev,
2343			    "Failed to register interrupt handler");
2344			return (error);
2345		}
2346		return (0);
2347	}
2348
2349	/*
2350	 * Use a Fast interrupt and the associated
2351	 * deferred processing contexts.
2352	 */
2353	TASK_INIT(&adapter->rxtx_task, 0, lem_handle_rxtx, adapter);
2354	TASK_INIT(&adapter->link_task, 0, lem_handle_link, adapter);
2355	adapter->tq = taskqueue_create_fast("lem_taskq", M_NOWAIT,
2356	    taskqueue_thread_enqueue, &adapter->tq);
2357	taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s taskq",
2358	    device_get_nameunit(adapter->dev));
2359	if ((error = bus_setup_intr(dev, adapter->res[0],
2360	    INTR_TYPE_NET, lem_irq_fast, NULL, adapter,
2361	    &adapter->tag[0])) != 0) {
2362		device_printf(dev, "Failed to register fast interrupt "
2363			    "handler: %d\n", error);
2364		taskqueue_free(adapter->tq);
2365		adapter->tq = NULL;
2366		return (error);
2367	}
2368
2369	return (0);
2370}
2371
2372
2373static void
2374lem_free_pci_resources(struct adapter *adapter)
2375{
2376	device_t dev = adapter->dev;
2377
2378
2379	if (adapter->tag[0] != NULL) {
2380		bus_teardown_intr(dev, adapter->res[0],
2381		    adapter->tag[0]);
2382		adapter->tag[0] = NULL;
2383	}
2384
2385	if (adapter->res[0] != NULL) {
2386		bus_release_resource(dev, SYS_RES_IRQ,
2387		    0, adapter->res[0]);
2388	}
2389
2390	if (adapter->memory != NULL)
2391		bus_release_resource(dev, SYS_RES_MEMORY,
2392		    PCIR_BAR(0), adapter->memory);
2393
2394	if (adapter->ioport != NULL)
2395		bus_release_resource(dev, SYS_RES_IOPORT,
2396		    adapter->io_rid, adapter->ioport);
2397}
2398
2399
2400/*********************************************************************
2401 *
2402 *  Initialize the hardware to a configuration
2403 *  as specified by the adapter structure.
2404 *
2405 **********************************************************************/
2406static int
2407lem_hardware_init(struct adapter *adapter)
2408{
2409	device_t dev = adapter->dev;
2410	u16 	rx_buffer_size;
2411
2412	INIT_DEBUGOUT("lem_hardware_init: begin");
2413
2414	/* Issue a global reset */
2415	e1000_reset_hw(&adapter->hw);
2416
2417	/* When hardware is reset, fifo_head is also reset */
2418	adapter->tx_fifo_head = 0;
2419
2420	/*
2421	 * These parameters control the automatic generation (Tx) and
2422	 * response (Rx) to Ethernet PAUSE frames.
2423	 * - High water mark should allow for at least two frames to be
2424	 *   received after sending an XOFF.
2425	 * - Low water mark works best when it is very near the high water mark.
2426	 *   This allows the receiver to restart by sending XON when it has
2427	 *   drained a bit. Here we use an arbitrary value of 1500 which will
2428	 *   restart after one full frame is pulled from the buffer. There
2429	 *   could be several smaller frames in the buffer and if so they will
2430	 *   not trigger the XON until their total number reduces the buffer
2431	 *   by 1500.
2432	 * - The pause time is fairly large at 1000 x 512ns = 512 usec.
2433	 */
2434	rx_buffer_size = ((E1000_READ_REG(&adapter->hw, E1000_PBA) &
2435	    0xffff) << 10 );
2436
2437	adapter->hw.fc.high_water = rx_buffer_size -
2438	    roundup2(adapter->max_frame_size, 1024);
2439	adapter->hw.fc.low_water = adapter->hw.fc.high_water - 1500;
2440
2441	adapter->hw.fc.pause_time = EM_FC_PAUSE_TIME;
2442	adapter->hw.fc.send_xon = TRUE;
2443
2444        /* Set Flow control, use the tunable location if sane */
2445        if ((lem_fc_setting >= 0) && (lem_fc_setting < 4))
2446                adapter->hw.fc.requested_mode = lem_fc_setting;
2447        else
2448                adapter->hw.fc.requested_mode = e1000_fc_none;
2449
2450	if (e1000_init_hw(&adapter->hw) < 0) {
2451		device_printf(dev, "Hardware Initialization Failed\n");
2452		return (EIO);
2453	}
2454
2455	e1000_check_for_link(&adapter->hw);
2456
2457	return (0);
2458}
2459
2460/*********************************************************************
2461 *
2462 *  Setup networking device structure and register an interface.
2463 *
2464 **********************************************************************/
2465static int
2466lem_setup_interface(device_t dev, struct adapter *adapter)
2467{
2468	if_t ifp;
2469
2470	INIT_DEBUGOUT("lem_setup_interface: begin");
2471
2472	ifp = adapter->ifp = if_gethandle(IFT_ETHER);
2473	if (ifp == (void *)NULL) {
2474		device_printf(dev, "can not allocate ifnet structure\n");
2475		return (-1);
2476	}
2477	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
2478	if_setinitfn(ifp,  lem_init);
2479	if_setsoftc(ifp, adapter);
2480	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
2481	if_setioctlfn(ifp, lem_ioctl);
2482	if_setstartfn(ifp, lem_start);
2483	if_setgetcounterfn(ifp, lem_get_counter);
2484	if_setsendqlen(ifp, adapter->num_tx_desc - 1);
2485	if_setsendqready(ifp);
2486
2487	ether_ifattach(ifp, adapter->hw.mac.addr);
2488
2489	if_setcapabilities(ifp, 0);
2490
2491	if (adapter->hw.mac.type >= e1000_82543) {
2492		if_setcapabilitiesbit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0);
2493		if_setcapenablebit(ifp, IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM, 0);
2494	}
2495
2496	/*
2497	 * Tell the upper layer(s) we support long frames.
2498	 */
2499	if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
2500	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
2501	if_setcapenablebit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU, 0);
2502
2503	/*
2504	** Dont turn this on by default, if vlans are
2505	** created on another pseudo device (eg. lagg)
2506	** then vlan events are not passed thru, breaking
2507	** operation, but with HW FILTER off it works. If
2508	** using vlans directly on the em driver you can
2509	** enable this and get full hardware tag filtering.
2510	*/
2511	if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
2512
2513#ifdef DEVICE_POLLING
2514	if_setcapabilitiesbit(ifp, IFCAP_POLLING, 0);
2515#endif
2516
2517	/* Enable only WOL MAGIC by default */
2518	if (adapter->wol) {
2519		if_setcapabilitiesbit(ifp, IFCAP_WOL, 0);
2520		if_setcapenablebit(ifp, IFCAP_WOL_MAGIC, 0);
2521	}
2522
2523	/*
2524	 * Specify the media types supported by this adapter and register
2525	 * callbacks to update media and link information
2526	 */
2527	ifmedia_init(&adapter->media, IFM_IMASK,
2528	    lem_media_change, lem_media_status);
2529	if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2530	    (adapter->hw.phy.media_type == e1000_media_type_internal_serdes)) {
2531		u_char fiber_type = IFM_1000_SX;	/* default type */
2532
2533		if (adapter->hw.mac.type == e1000_82545)
2534			fiber_type = IFM_1000_LX;
2535		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type | IFM_FDX,
2536			    0, NULL);
2537		ifmedia_add(&adapter->media, IFM_ETHER | fiber_type, 0, NULL);
2538	} else {
2539		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL);
2540		ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T | IFM_FDX,
2541			    0, NULL);
2542		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX,
2543			    0, NULL);
2544		ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX | IFM_FDX,
2545			    0, NULL);
2546		if (adapter->hw.phy.type != e1000_phy_ife) {
2547			ifmedia_add(&adapter->media,
2548				IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL);
2549			ifmedia_add(&adapter->media,
2550				IFM_ETHER | IFM_1000_T, 0, NULL);
2551		}
2552	}
2553	ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2554	ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO);
2555	return (0);
2556}
2557
2558
2559/*********************************************************************
2560 *
2561 *  Workaround for SmartSpeed on 82541 and 82547 controllers
2562 *
2563 **********************************************************************/
2564static void
2565lem_smartspeed(struct adapter *adapter)
2566{
2567	u16 phy_tmp;
2568
2569	if (adapter->link_active || (adapter->hw.phy.type != e1000_phy_igp) ||
2570	    adapter->hw.mac.autoneg == 0 ||
2571	    (adapter->hw.phy.autoneg_advertised & ADVERTISE_1000_FULL) == 0)
2572		return;
2573
2574	if (adapter->smartspeed == 0) {
2575		/* If Master/Slave config fault is asserted twice,
2576		 * we assume back-to-back */
2577		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2578		if (!(phy_tmp & SR_1000T_MS_CONFIG_FAULT))
2579			return;
2580		e1000_read_phy_reg(&adapter->hw, PHY_1000T_STATUS, &phy_tmp);
2581		if (phy_tmp & SR_1000T_MS_CONFIG_FAULT) {
2582			e1000_read_phy_reg(&adapter->hw,
2583			    PHY_1000T_CTRL, &phy_tmp);
2584			if(phy_tmp & CR_1000T_MS_ENABLE) {
2585				phy_tmp &= ~CR_1000T_MS_ENABLE;
2586				e1000_write_phy_reg(&adapter->hw,
2587				    PHY_1000T_CTRL, phy_tmp);
2588				adapter->smartspeed++;
2589				if(adapter->hw.mac.autoneg &&
2590				   !e1000_copper_link_autoneg(&adapter->hw) &&
2591				   !e1000_read_phy_reg(&adapter->hw,
2592				    PHY_CONTROL, &phy_tmp)) {
2593					phy_tmp |= (MII_CR_AUTO_NEG_EN |
2594						    MII_CR_RESTART_AUTO_NEG);
2595					e1000_write_phy_reg(&adapter->hw,
2596					    PHY_CONTROL, phy_tmp);
2597				}
2598			}
2599		}
2600		return;
2601	} else if(adapter->smartspeed == EM_SMARTSPEED_DOWNSHIFT) {
2602		/* If still no link, perhaps using 2/3 pair cable */
2603		e1000_read_phy_reg(&adapter->hw, PHY_1000T_CTRL, &phy_tmp);
2604		phy_tmp |= CR_1000T_MS_ENABLE;
2605		e1000_write_phy_reg(&adapter->hw, PHY_1000T_CTRL, phy_tmp);
2606		if(adapter->hw.mac.autoneg &&
2607		   !e1000_copper_link_autoneg(&adapter->hw) &&
2608		   !e1000_read_phy_reg(&adapter->hw, PHY_CONTROL, &phy_tmp)) {
2609			phy_tmp |= (MII_CR_AUTO_NEG_EN |
2610				    MII_CR_RESTART_AUTO_NEG);
2611			e1000_write_phy_reg(&adapter->hw, PHY_CONTROL, phy_tmp);
2612		}
2613	}
2614	/* Restart process after EM_SMARTSPEED_MAX iterations */
2615	if(adapter->smartspeed++ == EM_SMARTSPEED_MAX)
2616		adapter->smartspeed = 0;
2617}
2618
2619
2620/*
2621 * Manage DMA'able memory.
2622 */
2623static void
2624lem_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
2625{
2626	if (error)
2627		return;
2628	*(bus_addr_t *) arg = segs[0].ds_addr;
2629}
2630
2631static int
2632lem_dma_malloc(struct adapter *adapter, bus_size_t size,
2633        struct em_dma_alloc *dma, int mapflags)
2634{
2635	int error;
2636
2637	error = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2638				EM_DBA_ALIGN, 0,	/* alignment, bounds */
2639				BUS_SPACE_MAXADDR,	/* lowaddr */
2640				BUS_SPACE_MAXADDR,	/* highaddr */
2641				NULL, NULL,		/* filter, filterarg */
2642				size,			/* maxsize */
2643				1,			/* nsegments */
2644				size,			/* maxsegsize */
2645				0,			/* flags */
2646				NULL,			/* lockfunc */
2647				NULL,			/* lockarg */
2648				&dma->dma_tag);
2649	if (error) {
2650		device_printf(adapter->dev,
2651		    "%s: bus_dma_tag_create failed: %d\n",
2652		    __func__, error);
2653		goto fail_0;
2654	}
2655
2656	error = bus_dmamem_alloc(dma->dma_tag, (void**) &dma->dma_vaddr,
2657	    BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dma->dma_map);
2658	if (error) {
2659		device_printf(adapter->dev,
2660		    "%s: bus_dmamem_alloc(%ju) failed: %d\n",
2661		    __func__, (uintmax_t)size, error);
2662		goto fail_2;
2663	}
2664
2665	dma->dma_paddr = 0;
2666	error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2667	    size, lem_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2668	if (error || dma->dma_paddr == 0) {
2669		device_printf(adapter->dev,
2670		    "%s: bus_dmamap_load failed: %d\n",
2671		    __func__, error);
2672		goto fail_3;
2673	}
2674
2675	return (0);
2676
2677fail_3:
2678	bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2679fail_2:
2680	bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2681	bus_dma_tag_destroy(dma->dma_tag);
2682fail_0:
2683	dma->dma_tag = NULL;
2684
2685	return (error);
2686}
2687
2688static void
2689lem_dma_free(struct adapter *adapter, struct em_dma_alloc *dma)
2690{
2691	if (dma->dma_tag == NULL)
2692		return;
2693	if (dma->dma_paddr != 0) {
2694		bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2695		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2696		bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2697		dma->dma_paddr = 0;
2698	}
2699	if (dma->dma_vaddr != NULL) {
2700		bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2701		dma->dma_vaddr = NULL;
2702	}
2703	bus_dma_tag_destroy(dma->dma_tag);
2704	dma->dma_tag = NULL;
2705}
2706
2707
2708/*********************************************************************
2709 *
2710 *  Allocate memory for tx_buffer structures. The tx_buffer stores all
2711 *  the information needed to transmit a packet on the wire.
2712 *
2713 **********************************************************************/
2714static int
2715lem_allocate_transmit_structures(struct adapter *adapter)
2716{
2717	device_t dev = adapter->dev;
2718	struct em_buffer *tx_buffer;
2719	int error;
2720
2721	/*
2722	 * Create DMA tags for tx descriptors
2723	 */
2724	if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
2725				1, 0,			/* alignment, bounds */
2726				BUS_SPACE_MAXADDR,	/* lowaddr */
2727				BUS_SPACE_MAXADDR,	/* highaddr */
2728				NULL, NULL,		/* filter, filterarg */
2729				MCLBYTES * EM_MAX_SCATTER,	/* maxsize */
2730				EM_MAX_SCATTER,		/* nsegments */
2731				MCLBYTES,		/* maxsegsize */
2732				0,			/* flags */
2733				NULL,			/* lockfunc */
2734				NULL,			/* lockarg */
2735				&adapter->txtag)) != 0) {
2736		device_printf(dev, "Unable to allocate TX DMA tag\n");
2737		goto fail;
2738	}
2739
2740	adapter->tx_buffer_area = malloc(sizeof(struct em_buffer) *
2741	    adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
2742	if (adapter->tx_buffer_area == NULL) {
2743		device_printf(dev, "Unable to allocate tx_buffer memory\n");
2744		error = ENOMEM;
2745		goto fail;
2746	}
2747
2748	/* Create the descriptor buffer dma maps */
2749	for (int i = 0; i < adapter->num_tx_desc; i++) {
2750		tx_buffer = &adapter->tx_buffer_area[i];
2751		error = bus_dmamap_create(adapter->txtag, 0, &tx_buffer->map);
2752		if (error != 0) {
2753			device_printf(dev, "Unable to create TX DMA map\n");
2754			goto fail;
2755		}
2756		tx_buffer->next_eop = -1;
2757	}
2758
2759	return (0);
2760fail:
2761	lem_free_transmit_structures(adapter);
2762	return (error);
2763}
2764
2765/*********************************************************************
2766 *
2767 *  (Re)Initialize transmit structures.
2768 *
2769 **********************************************************************/
2770static void
2771lem_setup_transmit_structures(struct adapter *adapter)
2772{
2773	struct em_buffer *tx_buffer;
2774#ifdef DEV_NETMAP
2775	/* we are already locked */
2776	struct netmap_adapter *na = netmap_getna(adapter->ifp);
2777	struct netmap_slot *slot = netmap_reset(na, NR_TX, 0, 0);
2778#endif /* DEV_NETMAP */
2779
2780	/* Clear the old ring contents */
2781	bzero(adapter->tx_desc_base,
2782	    (sizeof(struct e1000_tx_desc)) * adapter->num_tx_desc);
2783
2784	/* Free any existing TX buffers */
2785	for (int i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
2786		tx_buffer = &adapter->tx_buffer_area[i];
2787		bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2788		    BUS_DMASYNC_POSTWRITE);
2789		bus_dmamap_unload(adapter->txtag, tx_buffer->map);
2790		m_freem(tx_buffer->m_head);
2791		tx_buffer->m_head = NULL;
2792#ifdef DEV_NETMAP
2793		if (slot) {
2794			/* the i-th NIC entry goes to slot si */
2795			int si = netmap_idx_n2k(&na->tx_rings[0], i);
2796			uint64_t paddr;
2797			void *addr;
2798
2799			addr = PNMB(na, slot + si, &paddr);
2800			adapter->tx_desc_base[i].buffer_addr = htole64(paddr);
2801			/* reload the map for netmap mode */
2802			netmap_load_map(na, adapter->txtag, tx_buffer->map, addr);
2803		}
2804#endif /* DEV_NETMAP */
2805		tx_buffer->next_eop = -1;
2806	}
2807
2808	/* Reset state */
2809	adapter->last_hw_offload = 0;
2810	adapter->next_avail_tx_desc = 0;
2811	adapter->next_tx_to_clean = 0;
2812	adapter->num_tx_desc_avail = adapter->num_tx_desc;
2813
2814	bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
2815	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2816
2817	return;
2818}
2819
2820/*********************************************************************
2821 *
2822 *  Enable transmit unit.
2823 *
2824 **********************************************************************/
2825static void
2826lem_initialize_transmit_unit(struct adapter *adapter)
2827{
2828	u32	tctl, tipg = 0;
2829	u64	bus_addr;
2830
2831	 INIT_DEBUGOUT("lem_initialize_transmit_unit: begin");
2832	/* Setup the Base and Length of the Tx Descriptor Ring */
2833	bus_addr = adapter->txdma.dma_paddr;
2834	E1000_WRITE_REG(&adapter->hw, E1000_TDLEN(0),
2835	    adapter->num_tx_desc * sizeof(struct e1000_tx_desc));
2836	E1000_WRITE_REG(&adapter->hw, E1000_TDBAH(0),
2837	    (u32)(bus_addr >> 32));
2838	E1000_WRITE_REG(&adapter->hw, E1000_TDBAL(0),
2839	    (u32)bus_addr);
2840	/* Setup the HW Tx Head and Tail descriptor pointers */
2841	E1000_WRITE_REG(&adapter->hw, E1000_TDT(0), 0);
2842	E1000_WRITE_REG(&adapter->hw, E1000_TDH(0), 0);
2843
2844	HW_DEBUGOUT2("Base = %x, Length = %x\n",
2845	    E1000_READ_REG(&adapter->hw, E1000_TDBAL(0)),
2846	    E1000_READ_REG(&adapter->hw, E1000_TDLEN(0)));
2847
2848	/* Set the default values for the Tx Inter Packet Gap timer */
2849	switch (adapter->hw.mac.type) {
2850	case e1000_82542:
2851		tipg = DEFAULT_82542_TIPG_IPGT;
2852		tipg |= DEFAULT_82542_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2853		tipg |= DEFAULT_82542_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2854		break;
2855	default:
2856		if ((adapter->hw.phy.media_type == e1000_media_type_fiber) ||
2857		    (adapter->hw.phy.media_type ==
2858		    e1000_media_type_internal_serdes))
2859			tipg = DEFAULT_82543_TIPG_IPGT_FIBER;
2860		else
2861			tipg = DEFAULT_82543_TIPG_IPGT_COPPER;
2862		tipg |= DEFAULT_82543_TIPG_IPGR1 << E1000_TIPG_IPGR1_SHIFT;
2863		tipg |= DEFAULT_82543_TIPG_IPGR2 << E1000_TIPG_IPGR2_SHIFT;
2864	}
2865
2866	E1000_WRITE_REG(&adapter->hw, E1000_TIPG, tipg);
2867	E1000_WRITE_REG(&adapter->hw, E1000_TIDV, adapter->tx_int_delay.value);
2868	if(adapter->hw.mac.type >= e1000_82540)
2869		E1000_WRITE_REG(&adapter->hw, E1000_TADV,
2870		    adapter->tx_abs_int_delay.value);
2871
2872	/* Program the Transmit Control Register */
2873	tctl = E1000_READ_REG(&adapter->hw, E1000_TCTL);
2874	tctl &= ~E1000_TCTL_CT;
2875	tctl |= (E1000_TCTL_PSP | E1000_TCTL_RTLC | E1000_TCTL_EN |
2876		   (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT));
2877
2878	/* This write will effectively turn on the transmit unit. */
2879	E1000_WRITE_REG(&adapter->hw, E1000_TCTL, tctl);
2880
2881	/* Setup Transmit Descriptor Base Settings */
2882	adapter->txd_cmd = E1000_TXD_CMD_IFCS;
2883
2884	if (adapter->tx_int_delay.value > 0)
2885		adapter->txd_cmd |= E1000_TXD_CMD_IDE;
2886}
2887
2888/*********************************************************************
2889 *
2890 *  Free all transmit related data structures.
2891 *
2892 **********************************************************************/
2893static void
2894lem_free_transmit_structures(struct adapter *adapter)
2895{
2896	struct em_buffer *tx_buffer;
2897
2898	INIT_DEBUGOUT("free_transmit_structures: begin");
2899
2900	if (adapter->tx_buffer_area != NULL) {
2901		for (int i = 0; i < adapter->num_tx_desc; i++) {
2902			tx_buffer = &adapter->tx_buffer_area[i];
2903			if (tx_buffer->m_head != NULL) {
2904				bus_dmamap_sync(adapter->txtag, tx_buffer->map,
2905				    BUS_DMASYNC_POSTWRITE);
2906				bus_dmamap_unload(adapter->txtag,
2907				    tx_buffer->map);
2908				m_freem(tx_buffer->m_head);
2909				tx_buffer->m_head = NULL;
2910			} else if (tx_buffer->map != NULL)
2911				bus_dmamap_unload(adapter->txtag,
2912				    tx_buffer->map);
2913			if (tx_buffer->map != NULL) {
2914				bus_dmamap_destroy(adapter->txtag,
2915				    tx_buffer->map);
2916				tx_buffer->map = NULL;
2917			}
2918		}
2919	}
2920	if (adapter->tx_buffer_area != NULL) {
2921		free(adapter->tx_buffer_area, M_DEVBUF);
2922		adapter->tx_buffer_area = NULL;
2923	}
2924	if (adapter->txtag != NULL) {
2925		bus_dma_tag_destroy(adapter->txtag);
2926		adapter->txtag = NULL;
2927	}
2928}
2929
2930/*********************************************************************
2931 *
2932 *  The offload context needs to be set when we transfer the first
2933 *  packet of a particular protocol (TCP/UDP). This routine has been
2934 *  enhanced to deal with inserted VLAN headers, and IPV6 (not complete)
2935 *
2936 *  Added back the old method of keeping the current context type
2937 *  and not setting if unnecessary, as this is reported to be a
2938 *  big performance win.  -jfv
2939 **********************************************************************/
2940static void
2941lem_transmit_checksum_setup(struct adapter *adapter, struct mbuf *mp,
2942    u32 *txd_upper, u32 *txd_lower)
2943{
2944	struct e1000_context_desc *TXD = NULL;
2945	struct em_buffer *tx_buffer;
2946	struct ether_vlan_header *eh;
2947	struct ip *ip = NULL;
2948	struct ip6_hdr *ip6;
2949	int curr_txd, ehdrlen;
2950	u32 cmd, hdr_len, ip_hlen;
2951	u16 etype;
2952	u8 ipproto;
2953
2954
2955	cmd = hdr_len = ipproto = 0;
2956	*txd_upper = *txd_lower = 0;
2957	curr_txd = adapter->next_avail_tx_desc;
2958
2959	/*
2960	 * Determine where frame payload starts.
2961	 * Jump over vlan headers if already present,
2962	 * helpful for QinQ too.
2963	 */
2964	eh = mtod(mp, struct ether_vlan_header *);
2965	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2966		etype = ntohs(eh->evl_proto);
2967		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
2968	} else {
2969		etype = ntohs(eh->evl_encap_proto);
2970		ehdrlen = ETHER_HDR_LEN;
2971	}
2972
2973	/*
2974	 * We only support TCP/UDP for IPv4 and IPv6 for the moment.
2975	 * TODO: Support SCTP too when it hits the tree.
2976	 */
2977	switch (etype) {
2978	case ETHERTYPE_IP:
2979		ip = (struct ip *)(mp->m_data + ehdrlen);
2980		ip_hlen = ip->ip_hl << 2;
2981
2982		/* Setup of IP header checksum. */
2983		if (mp->m_pkthdr.csum_flags & CSUM_IP) {
2984			/*
2985			 * Start offset for header checksum calculation.
2986			 * End offset for header checksum calculation.
2987			 * Offset of place to put the checksum.
2988			 */
2989			TXD = (struct e1000_context_desc *)
2990			    &adapter->tx_desc_base[curr_txd];
2991			TXD->lower_setup.ip_fields.ipcss = ehdrlen;
2992			TXD->lower_setup.ip_fields.ipcse =
2993			    htole16(ehdrlen + ip_hlen);
2994			TXD->lower_setup.ip_fields.ipcso =
2995			    ehdrlen + offsetof(struct ip, ip_sum);
2996			cmd |= E1000_TXD_CMD_IP;
2997			*txd_upper |= E1000_TXD_POPTS_IXSM << 8;
2998		}
2999
3000		hdr_len = ehdrlen + ip_hlen;
3001		ipproto = ip->ip_p;
3002
3003		break;
3004	case ETHERTYPE_IPV6:
3005		ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
3006		ip_hlen = sizeof(struct ip6_hdr); /* XXX: No header stacking. */
3007
3008		/* IPv6 doesn't have a header checksum. */
3009
3010		hdr_len = ehdrlen + ip_hlen;
3011		ipproto = ip6->ip6_nxt;
3012		break;
3013
3014	default:
3015		return;
3016	}
3017
3018	switch (ipproto) {
3019	case IPPROTO_TCP:
3020		if (mp->m_pkthdr.csum_flags & CSUM_TCP) {
3021			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3022			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3023			/* no need for context if already set */
3024			if (adapter->last_hw_offload == CSUM_TCP)
3025				return;
3026			adapter->last_hw_offload = CSUM_TCP;
3027			/*
3028			 * Start offset for payload checksum calculation.
3029			 * End offset for payload checksum calculation.
3030			 * Offset of place to put the checksum.
3031			 */
3032			TXD = (struct e1000_context_desc *)
3033			    &adapter->tx_desc_base[curr_txd];
3034			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3035			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3036			TXD->upper_setup.tcp_fields.tucso =
3037			    hdr_len + offsetof(struct tcphdr, th_sum);
3038			cmd |= E1000_TXD_CMD_TCP;
3039		}
3040		break;
3041	case IPPROTO_UDP:
3042	{
3043		if (mp->m_pkthdr.csum_flags & CSUM_UDP) {
3044			*txd_lower = E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
3045			*txd_upper |= E1000_TXD_POPTS_TXSM << 8;
3046			/* no need for context if already set */
3047			if (adapter->last_hw_offload == CSUM_UDP)
3048				return;
3049			adapter->last_hw_offload = CSUM_UDP;
3050			/*
3051			 * Start offset for header checksum calculation.
3052			 * End offset for header checksum calculation.
3053			 * Offset of place to put the checksum.
3054			 */
3055			TXD = (struct e1000_context_desc *)
3056			    &adapter->tx_desc_base[curr_txd];
3057			TXD->upper_setup.tcp_fields.tucss = hdr_len;
3058			TXD->upper_setup.tcp_fields.tucse = htole16(0);
3059			TXD->upper_setup.tcp_fields.tucso =
3060			    hdr_len + offsetof(struct udphdr, uh_sum);
3061		}
3062		/* Fall Thru */
3063	}
3064	default:
3065		break;
3066	}
3067
3068	if (TXD == NULL)
3069		return;
3070	TXD->tcp_seg_setup.data = htole32(0);
3071	TXD->cmd_and_length =
3072	    htole32(adapter->txd_cmd | E1000_TXD_CMD_DEXT | cmd);
3073	tx_buffer = &adapter->tx_buffer_area[curr_txd];
3074	tx_buffer->m_head = NULL;
3075	tx_buffer->next_eop = -1;
3076
3077	if (++curr_txd == adapter->num_tx_desc)
3078		curr_txd = 0;
3079
3080	adapter->num_tx_desc_avail--;
3081	adapter->next_avail_tx_desc = curr_txd;
3082}
3083
3084
3085/**********************************************************************
3086 *
3087 *  Examine each tx_buffer in the used queue. If the hardware is done
3088 *  processing the packet then free associated resources. The
3089 *  tx_buffer is put back on the free queue.
3090 *
3091 **********************************************************************/
3092static void
3093lem_txeof(struct adapter *adapter)
3094{
3095        int first, last, done, num_avail;
3096        struct em_buffer *tx_buffer;
3097        struct e1000_tx_desc   *tx_desc, *eop_desc;
3098	if_t ifp = adapter->ifp;
3099
3100	EM_TX_LOCK_ASSERT(adapter);
3101
3102#ifdef DEV_NETMAP
3103	if (netmap_tx_irq(ifp, 0))
3104		return;
3105#endif /* DEV_NETMAP */
3106        if (adapter->num_tx_desc_avail == adapter->num_tx_desc)
3107                return;
3108
3109        num_avail = adapter->num_tx_desc_avail;
3110        first = adapter->next_tx_to_clean;
3111        tx_desc = &adapter->tx_desc_base[first];
3112        tx_buffer = &adapter->tx_buffer_area[first];
3113	last = tx_buffer->next_eop;
3114        eop_desc = &adapter->tx_desc_base[last];
3115
3116	/*
3117	 * What this does is get the index of the
3118	 * first descriptor AFTER the EOP of the
3119	 * first packet, that way we can do the
3120	 * simple comparison on the inner while loop.
3121	 */
3122	if (++last == adapter->num_tx_desc)
3123 		last = 0;
3124	done = last;
3125
3126        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3127            BUS_DMASYNC_POSTREAD);
3128
3129        while (eop_desc->upper.fields.status & E1000_TXD_STAT_DD) {
3130		/* We clean the range of the packet */
3131		while (first != done) {
3132                	tx_desc->upper.data = 0;
3133                	tx_desc->lower.data = 0;
3134                	tx_desc->buffer_addr = 0;
3135                	++num_avail;
3136
3137			if (tx_buffer->m_head) {
3138				if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
3139				bus_dmamap_sync(adapter->txtag,
3140				    tx_buffer->map,
3141				    BUS_DMASYNC_POSTWRITE);
3142				bus_dmamap_unload(adapter->txtag,
3143				    tx_buffer->map);
3144
3145                        	m_freem(tx_buffer->m_head);
3146                        	tx_buffer->m_head = NULL;
3147                	}
3148			tx_buffer->next_eop = -1;
3149			adapter->watchdog_time = ticks;
3150
3151	                if (++first == adapter->num_tx_desc)
3152				first = 0;
3153
3154	                tx_buffer = &adapter->tx_buffer_area[first];
3155			tx_desc = &adapter->tx_desc_base[first];
3156		}
3157		/* See if we can continue to the next packet */
3158		last = tx_buffer->next_eop;
3159		if (last != -1) {
3160        		eop_desc = &adapter->tx_desc_base[last];
3161			/* Get new done point */
3162			if (++last == adapter->num_tx_desc) last = 0;
3163			done = last;
3164		} else
3165			break;
3166        }
3167        bus_dmamap_sync(adapter->txdma.dma_tag, adapter->txdma.dma_map,
3168            BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3169
3170        adapter->next_tx_to_clean = first;
3171        adapter->num_tx_desc_avail = num_avail;
3172
3173#ifdef NIC_SEND_COMBINING
3174	if ((adapter->shadow_tdt & MIT_PENDING_TDT) == MIT_PENDING_TDT) {
3175		/* a tdt write is pending, do it */
3176		E1000_WRITE_REG(&adapter->hw, E1000_TDT(0),
3177			0xffff & adapter->shadow_tdt);
3178		adapter->shadow_tdt = MIT_PENDING_INT;
3179	} else {
3180		adapter->shadow_tdt = 0; // disable
3181	}
3182#endif /* NIC_SEND_COMBINING */
3183        /*
3184         * If we have enough room, clear IFF_DRV_OACTIVE to
3185         * tell the stack that it is OK to send packets.
3186         * If there are no pending descriptors, clear the watchdog.
3187         */
3188        if (adapter->num_tx_desc_avail > EM_TX_CLEANUP_THRESHOLD) {
3189                if_setdrvflagbits(ifp, 0, IFF_DRV_OACTIVE);
3190#ifdef NIC_PARAVIRT
3191		if (adapter->csb) { // XXX also csb_on ?
3192			adapter->csb->guest_need_txkick = 2; /* acked */
3193			// XXX memory barrier
3194		}
3195#endif /* NIC_PARAVIRT */
3196                if (adapter->num_tx_desc_avail == adapter->num_tx_desc) {
3197			adapter->watchdog_check = FALSE;
3198			return;
3199		}
3200        }
3201}
3202
3203/*********************************************************************
3204 *
3205 *  When Link is lost sometimes there is work still in the TX ring
3206 *  which may result in a watchdog, rather than allow that we do an
3207 *  attempted cleanup and then reinit here. Note that this has been
3208 *  seens mostly with fiber adapters.
3209 *
3210 **********************************************************************/
3211static void
3212lem_tx_purge(struct adapter *adapter)
3213{
3214	if ((!adapter->link_active) && (adapter->watchdog_check)) {
3215		EM_TX_LOCK(adapter);
3216		lem_txeof(adapter);
3217		EM_TX_UNLOCK(adapter);
3218		if (adapter->watchdog_check) /* Still outstanding? */
3219			lem_init_locked(adapter);
3220	}
3221}
3222
3223/*********************************************************************
3224 *
3225 *  Get a buffer from system mbuf buffer pool.
3226 *
3227 **********************************************************************/
3228static int
3229lem_get_buf(struct adapter *adapter, int i)
3230{
3231	struct mbuf		*m;
3232	bus_dma_segment_t	segs[1];
3233	bus_dmamap_t		map;
3234	struct em_buffer	*rx_buffer;
3235	int			error, nsegs;
3236
3237	m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
3238	if (m == NULL) {
3239		adapter->mbuf_cluster_failed++;
3240		return (ENOBUFS);
3241	}
3242	m->m_len = m->m_pkthdr.len = MCLBYTES;
3243
3244	if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
3245		m_adj(m, ETHER_ALIGN);
3246
3247	/*
3248	 * Using memory from the mbuf cluster pool, invoke the
3249	 * bus_dma machinery to arrange the memory mapping.
3250	 */
3251	error = bus_dmamap_load_mbuf_sg(adapter->rxtag,
3252	    adapter->rx_sparemap, m, segs, &nsegs, BUS_DMA_NOWAIT);
3253	if (error != 0) {
3254		m_free(m);
3255		return (error);
3256	}
3257
3258	/* If nsegs is wrong then the stack is corrupt. */
3259	KASSERT(nsegs == 1, ("Too many segments returned!"));
3260
3261	rx_buffer = &adapter->rx_buffer_area[i];
3262	if (rx_buffer->m_head != NULL)
3263		bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3264
3265	map = rx_buffer->map;
3266	rx_buffer->map = adapter->rx_sparemap;
3267	adapter->rx_sparemap = map;
3268	bus_dmamap_sync(adapter->rxtag, rx_buffer->map, BUS_DMASYNC_PREREAD);
3269	rx_buffer->m_head = m;
3270
3271	adapter->rx_desc_base[i].buffer_addr = htole64(segs[0].ds_addr);
3272	return (0);
3273}
3274
3275/*********************************************************************
3276 *
3277 *  Allocate memory for rx_buffer structures. Since we use one
3278 *  rx_buffer per received packet, the maximum number of rx_buffer's
3279 *  that we'll need is equal to the number of receive descriptors
3280 *  that we've allocated.
3281 *
3282 **********************************************************************/
3283static int
3284lem_allocate_receive_structures(struct adapter *adapter)
3285{
3286	device_t dev = adapter->dev;
3287	struct em_buffer *rx_buffer;
3288	int i, error;
3289
3290	adapter->rx_buffer_area = malloc(sizeof(struct em_buffer) *
3291	    adapter->num_rx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
3292	if (adapter->rx_buffer_area == NULL) {
3293		device_printf(dev, "Unable to allocate rx_buffer memory\n");
3294		return (ENOMEM);
3295	}
3296
3297	error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
3298				1, 0,			/* alignment, bounds */
3299				BUS_SPACE_MAXADDR,	/* lowaddr */
3300				BUS_SPACE_MAXADDR,	/* highaddr */
3301				NULL, NULL,		/* filter, filterarg */
3302				MCLBYTES,		/* maxsize */
3303				1,			/* nsegments */
3304				MCLBYTES,		/* maxsegsize */
3305				0,			/* flags */
3306				NULL,			/* lockfunc */
3307				NULL,			/* lockarg */
3308				&adapter->rxtag);
3309	if (error) {
3310		device_printf(dev, "%s: bus_dma_tag_create failed %d\n",
3311		    __func__, error);
3312		goto fail;
3313	}
3314
3315	/* Create the spare map (used by getbuf) */
3316	error = bus_dmamap_create(adapter->rxtag, 0, &adapter->rx_sparemap);
3317	if (error) {
3318		device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3319		    __func__, error);
3320		goto fail;
3321	}
3322
3323	rx_buffer = adapter->rx_buffer_area;
3324	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3325		error = bus_dmamap_create(adapter->rxtag, 0, &rx_buffer->map);
3326		if (error) {
3327			device_printf(dev, "%s: bus_dmamap_create failed: %d\n",
3328			    __func__, error);
3329			goto fail;
3330		}
3331	}
3332
3333	return (0);
3334
3335fail:
3336	lem_free_receive_structures(adapter);
3337	return (error);
3338}
3339
3340/*********************************************************************
3341 *
3342 *  (Re)initialize receive structures.
3343 *
3344 **********************************************************************/
3345static int
3346lem_setup_receive_structures(struct adapter *adapter)
3347{
3348	struct em_buffer *rx_buffer;
3349	int i, error;
3350#ifdef DEV_NETMAP
3351	/* we are already under lock */
3352	struct netmap_adapter *na = netmap_getna(adapter->ifp);
3353	struct netmap_slot *slot = netmap_reset(na, NR_RX, 0, 0);
3354#endif
3355
3356	/* Reset descriptor ring */
3357	bzero(adapter->rx_desc_base,
3358	    (sizeof(struct e1000_rx_desc)) * adapter->num_rx_desc);
3359
3360	/* Free current RX buffers. */
3361	rx_buffer = adapter->rx_buffer_area;
3362	for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3363		if (rx_buffer->m_head != NULL) {
3364			bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3365			    BUS_DMASYNC_POSTREAD);
3366			bus_dmamap_unload(adapter->rxtag, rx_buffer->map);
3367			m_freem(rx_buffer->m_head);
3368			rx_buffer->m_head = NULL;
3369		}
3370        }
3371
3372	/* Allocate new ones. */
3373	for (i = 0; i < adapter->num_rx_desc; i++) {
3374#ifdef DEV_NETMAP
3375		if (slot) {
3376			/* the i-th NIC entry goes to slot si */
3377			int si = netmap_idx_n2k(&na->rx_rings[0], i);
3378			uint64_t paddr;
3379			void *addr;
3380
3381			addr = PNMB(na, slot + si, &paddr);
3382			netmap_load_map(na, adapter->rxtag, rx_buffer->map, addr);
3383			/* Update descriptor */
3384			adapter->rx_desc_base[i].buffer_addr = htole64(paddr);
3385			continue;
3386		}
3387#endif /* DEV_NETMAP */
3388		error = lem_get_buf(adapter, i);
3389		if (error)
3390                        return (error);
3391	}
3392
3393	/* Setup our descriptor pointers */
3394	adapter->next_rx_desc_to_check = 0;
3395	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3396	    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3397
3398	return (0);
3399}
3400
3401/*********************************************************************
3402 *
3403 *  Enable receive unit.
3404 *
3405 **********************************************************************/
3406
3407static void
3408lem_initialize_receive_unit(struct adapter *adapter)
3409{
3410	if_t ifp = adapter->ifp;
3411	u64	bus_addr;
3412	u32	rctl, rxcsum;
3413
3414	INIT_DEBUGOUT("lem_initialize_receive_unit: begin");
3415
3416	/*
3417	 * Make sure receives are disabled while setting
3418	 * up the descriptor ring
3419	 */
3420	rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
3421	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl & ~E1000_RCTL_EN);
3422
3423	if (adapter->hw.mac.type >= e1000_82540) {
3424		E1000_WRITE_REG(&adapter->hw, E1000_RADV,
3425		    adapter->rx_abs_int_delay.value);
3426		/*
3427		 * Set the interrupt throttling rate. Value is calculated
3428		 * as DEFAULT_ITR = 1/(MAX_INTS_PER_SEC * 256ns)
3429		 */
3430		E1000_WRITE_REG(&adapter->hw, E1000_ITR, DEFAULT_ITR);
3431	}
3432
3433	/* Setup the Base and Length of the Rx Descriptor Ring */
3434	bus_addr = adapter->rxdma.dma_paddr;
3435	E1000_WRITE_REG(&adapter->hw, E1000_RDLEN(0),
3436	    adapter->num_rx_desc * sizeof(struct e1000_rx_desc));
3437	E1000_WRITE_REG(&adapter->hw, E1000_RDBAH(0),
3438	    (u32)(bus_addr >> 32));
3439	E1000_WRITE_REG(&adapter->hw, E1000_RDBAL(0),
3440	    (u32)bus_addr);
3441
3442	/* Setup the Receive Control Register */
3443	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3444	rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_LBM_NO |
3445		   E1000_RCTL_RDMTS_HALF |
3446		   (adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3447
3448	/* Make sure VLAN Filters are off */
3449	rctl &= ~E1000_RCTL_VFE;
3450
3451	if (e1000_tbi_sbp_enabled_82543(&adapter->hw))
3452		rctl |= E1000_RCTL_SBP;
3453	else
3454		rctl &= ~E1000_RCTL_SBP;
3455
3456	switch (adapter->rx_buffer_len) {
3457	default:
3458	case 2048:
3459		rctl |= E1000_RCTL_SZ_2048;
3460		break;
3461	case 4096:
3462		rctl |= E1000_RCTL_SZ_4096 |
3463		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3464		break;
3465	case 8192:
3466		rctl |= E1000_RCTL_SZ_8192 |
3467		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3468		break;
3469	case 16384:
3470		rctl |= E1000_RCTL_SZ_16384 |
3471		    E1000_RCTL_BSEX | E1000_RCTL_LPE;
3472		break;
3473	}
3474
3475	if (if_getmtu(ifp) > ETHERMTU)
3476		rctl |= E1000_RCTL_LPE;
3477	else
3478		rctl &= ~E1000_RCTL_LPE;
3479
3480	/* Enable 82543 Receive Checksum Offload for TCP and UDP */
3481	if ((adapter->hw.mac.type >= e1000_82543) &&
3482	    (if_getcapenable(ifp) & IFCAP_RXCSUM)) {
3483		rxcsum = E1000_READ_REG(&adapter->hw, E1000_RXCSUM);
3484		rxcsum |= (E1000_RXCSUM_IPOFL | E1000_RXCSUM_TUOFL);
3485		E1000_WRITE_REG(&adapter->hw, E1000_RXCSUM, rxcsum);
3486	}
3487
3488	/* Enable Receives */
3489	E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
3490
3491	/*
3492	 * Setup the HW Rx Head and
3493	 * Tail Descriptor Pointers
3494	 */
3495	E1000_WRITE_REG(&adapter->hw, E1000_RDH(0), 0);
3496	rctl = adapter->num_rx_desc - 1; /* default RDT value */
3497#ifdef DEV_NETMAP
3498	/* preserve buffers already made available to clients */
3499	if (if_getcapenable(ifp) & IFCAP_NETMAP) {
3500		struct netmap_adapter *na = netmap_getna(adapter->ifp);
3501		rctl -= nm_kr_rxspace(&na->rx_rings[0]);
3502	}
3503#endif /* DEV_NETMAP */
3504	E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), rctl);
3505
3506	return;
3507}
3508
3509/*********************************************************************
3510 *
3511 *  Free receive related data structures.
3512 *
3513 **********************************************************************/
3514static void
3515lem_free_receive_structures(struct adapter *adapter)
3516{
3517	struct em_buffer *rx_buffer;
3518	int i;
3519
3520	INIT_DEBUGOUT("free_receive_structures: begin");
3521
3522	if (adapter->rx_sparemap) {
3523		bus_dmamap_destroy(adapter->rxtag, adapter->rx_sparemap);
3524		adapter->rx_sparemap = NULL;
3525	}
3526
3527	/* Cleanup any existing buffers */
3528	if (adapter->rx_buffer_area != NULL) {
3529		rx_buffer = adapter->rx_buffer_area;
3530		for (i = 0; i < adapter->num_rx_desc; i++, rx_buffer++) {
3531			if (rx_buffer->m_head != NULL) {
3532				bus_dmamap_sync(adapter->rxtag, rx_buffer->map,
3533				    BUS_DMASYNC_POSTREAD);
3534				bus_dmamap_unload(adapter->rxtag,
3535				    rx_buffer->map);
3536				m_freem(rx_buffer->m_head);
3537				rx_buffer->m_head = NULL;
3538			} else if (rx_buffer->map != NULL)
3539				bus_dmamap_unload(adapter->rxtag,
3540				    rx_buffer->map);
3541			if (rx_buffer->map != NULL) {
3542				bus_dmamap_destroy(adapter->rxtag,
3543				    rx_buffer->map);
3544				rx_buffer->map = NULL;
3545			}
3546		}
3547	}
3548
3549	if (adapter->rx_buffer_area != NULL) {
3550		free(adapter->rx_buffer_area, M_DEVBUF);
3551		adapter->rx_buffer_area = NULL;
3552	}
3553
3554	if (adapter->rxtag != NULL) {
3555		bus_dma_tag_destroy(adapter->rxtag);
3556		adapter->rxtag = NULL;
3557	}
3558}
3559
3560/*********************************************************************
3561 *
3562 *  This routine executes in interrupt context. It replenishes
3563 *  the mbufs in the descriptor and sends data which has been
3564 *  dma'ed into host memory to upper layer.
3565 *
3566 *  We loop at most count times if count is > 0, or until done if
3567 *  count < 0.
3568 *
3569 *  For polling we also now return the number of cleaned packets
3570 *********************************************************************/
3571static bool
3572lem_rxeof(struct adapter *adapter, int count, int *done)
3573{
3574	if_t ifp = adapter->ifp;
3575	struct mbuf	*mp;
3576	u8		status = 0, accept_frame = 0, eop = 0;
3577	u16 		len, desc_len, prev_len_adj;
3578	int		i, rx_sent = 0;
3579	struct e1000_rx_desc   *current_desc;
3580
3581#ifdef BATCH_DISPATCH
3582	struct mbuf *mh = NULL, *mt = NULL;
3583#endif /* BATCH_DISPATCH */
3584#ifdef NIC_PARAVIRT
3585	int retries = 0;
3586	struct paravirt_csb* csb = adapter->csb;
3587	int csb_mode = csb && csb->guest_csb_on;
3588
3589	//ND("clear guest_rxkick at %d", adapter->next_rx_desc_to_check);
3590	if (csb_mode && csb->guest_need_rxkick)
3591		csb->guest_need_rxkick = 0;
3592#endif /* NIC_PARAVIRT */
3593	EM_RX_LOCK(adapter);
3594
3595#ifdef BATCH_DISPATCH
3596    batch_again:
3597#endif /* BATCH_DISPATCH */
3598	i = adapter->next_rx_desc_to_check;
3599	current_desc = &adapter->rx_desc_base[i];
3600	bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3601	    BUS_DMASYNC_POSTREAD);
3602
3603#ifdef DEV_NETMAP
3604	if (netmap_rx_irq(ifp, 0, &rx_sent)) {
3605		EM_RX_UNLOCK(adapter);
3606		return (FALSE);
3607	}
3608#endif /* DEV_NETMAP */
3609
3610#if 1 // XXX optimization ?
3611	if (!((current_desc->status) & E1000_RXD_STAT_DD)) {
3612		if (done != NULL)
3613			*done = rx_sent;
3614		EM_RX_UNLOCK(adapter);
3615		return (FALSE);
3616	}
3617#endif /* 0 */
3618
3619	while (count != 0 && if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3620		struct mbuf *m = NULL;
3621
3622		status = current_desc->status;
3623		if ((status & E1000_RXD_STAT_DD) == 0) {
3624#ifdef NIC_PARAVIRT
3625		    if (csb_mode) {
3626			/* buffer not ready yet. Retry a few times before giving up */
3627			if (++retries <= adapter->rx_retries) {
3628				continue;
3629			}
3630			if (csb->guest_need_rxkick == 0) {
3631				// ND("set guest_rxkick at %d", adapter->next_rx_desc_to_check);
3632				csb->guest_need_rxkick = 1;
3633				// XXX memory barrier, status volatile ?
3634				continue; /* double check */
3635			}
3636		    }
3637		    /* no buffer ready, give up */
3638#endif /* NIC_PARAVIRT */
3639			break;
3640		}
3641#ifdef NIC_PARAVIRT
3642		if (csb_mode) {
3643			if (csb->guest_need_rxkick)
3644				// ND("clear again guest_rxkick at %d", adapter->next_rx_desc_to_check);
3645			csb->guest_need_rxkick = 0;
3646			retries = 0;
3647		}
3648#endif /* NIC_PARAVIRT */
3649
3650		mp = adapter->rx_buffer_area[i].m_head;
3651		/*
3652		 * Can't defer bus_dmamap_sync(9) because TBI_ACCEPT
3653		 * needs to access the last received byte in the mbuf.
3654		 */
3655		bus_dmamap_sync(adapter->rxtag, adapter->rx_buffer_area[i].map,
3656		    BUS_DMASYNC_POSTREAD);
3657
3658		accept_frame = 1;
3659		prev_len_adj = 0;
3660		desc_len = le16toh(current_desc->length);
3661		if (status & E1000_RXD_STAT_EOP) {
3662			count--;
3663			eop = 1;
3664			if (desc_len < ETHER_CRC_LEN) {
3665				len = 0;
3666				prev_len_adj = ETHER_CRC_LEN - desc_len;
3667			} else
3668				len = desc_len - ETHER_CRC_LEN;
3669		} else {
3670			eop = 0;
3671			len = desc_len;
3672		}
3673
3674		if (current_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
3675			u8	last_byte;
3676			u32	pkt_len = desc_len;
3677
3678			if (adapter->fmp != NULL)
3679				pkt_len += adapter->fmp->m_pkthdr.len;
3680
3681			last_byte = *(mtod(mp, caddr_t) + desc_len - 1);
3682			if (TBI_ACCEPT(&adapter->hw, status,
3683			    current_desc->errors, pkt_len, last_byte,
3684			    adapter->min_frame_size, adapter->max_frame_size)) {
3685				e1000_tbi_adjust_stats_82543(&adapter->hw,
3686				    &adapter->stats, pkt_len,
3687				    adapter->hw.mac.addr,
3688				    adapter->max_frame_size);
3689				if (len > 0)
3690					len--;
3691			} else
3692				accept_frame = 0;
3693		}
3694
3695		if (accept_frame) {
3696			if (lem_get_buf(adapter, i) != 0) {
3697				if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
3698				goto discard;
3699			}
3700
3701			/* Assign correct length to the current fragment */
3702			mp->m_len = len;
3703
3704			if (adapter->fmp == NULL) {
3705				mp->m_pkthdr.len = len;
3706				adapter->fmp = mp; /* Store the first mbuf */
3707				adapter->lmp = mp;
3708			} else {
3709				/* Chain mbuf's together */
3710				mp->m_flags &= ~M_PKTHDR;
3711				/*
3712				 * Adjust length of previous mbuf in chain if
3713				 * we received less than 4 bytes in the last
3714				 * descriptor.
3715				 */
3716				if (prev_len_adj > 0) {
3717					adapter->lmp->m_len -= prev_len_adj;
3718					adapter->fmp->m_pkthdr.len -=
3719					    prev_len_adj;
3720				}
3721				adapter->lmp->m_next = mp;
3722				adapter->lmp = adapter->lmp->m_next;
3723				adapter->fmp->m_pkthdr.len += len;
3724			}
3725
3726			if (eop) {
3727				if_setrcvif(adapter->fmp, ifp);
3728				if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
3729				lem_receive_checksum(adapter, current_desc,
3730				    adapter->fmp);
3731#ifndef __NO_STRICT_ALIGNMENT
3732				if (adapter->max_frame_size >
3733				    (MCLBYTES - ETHER_ALIGN) &&
3734				    lem_fixup_rx(adapter) != 0)
3735					goto skip;
3736#endif
3737				if (status & E1000_RXD_STAT_VP) {
3738					adapter->fmp->m_pkthdr.ether_vtag =
3739					    le16toh(current_desc->special);
3740					adapter->fmp->m_flags |= M_VLANTAG;
3741				}
3742#ifndef __NO_STRICT_ALIGNMENT
3743skip:
3744#endif
3745				m = adapter->fmp;
3746				adapter->fmp = NULL;
3747				adapter->lmp = NULL;
3748			}
3749		} else {
3750			adapter->dropped_pkts++;
3751discard:
3752			/* Reuse loaded DMA map and just update mbuf chain */
3753			mp = adapter->rx_buffer_area[i].m_head;
3754			mp->m_len = mp->m_pkthdr.len = MCLBYTES;
3755			mp->m_data = mp->m_ext.ext_buf;
3756			mp->m_next = NULL;
3757			if (adapter->max_frame_size <=
3758			    (MCLBYTES - ETHER_ALIGN))
3759				m_adj(mp, ETHER_ALIGN);
3760			if (adapter->fmp != NULL) {
3761				m_freem(adapter->fmp);
3762				adapter->fmp = NULL;
3763				adapter->lmp = NULL;
3764			}
3765			m = NULL;
3766		}
3767
3768		/* Zero out the receive descriptors status. */
3769		current_desc->status = 0;
3770		bus_dmamap_sync(adapter->rxdma.dma_tag, adapter->rxdma.dma_map,
3771		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3772
3773#ifdef NIC_PARAVIRT
3774		if (csb_mode) {
3775			/* the buffer at i has been already replaced by lem_get_buf()
3776			 * so it is safe to set guest_rdt = i and possibly send a kick.
3777			 * XXX see if we can optimize it later.
3778			 */
3779			csb->guest_rdt = i;
3780			// XXX memory barrier
3781			if (i == csb->host_rxkick_at)
3782				E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
3783		}
3784#endif /* NIC_PARAVIRT */
3785		/* Advance our pointers to the next descriptor. */
3786		if (++i == adapter->num_rx_desc)
3787			i = 0;
3788		/* Call into the stack */
3789		if (m != NULL) {
3790#ifdef BATCH_DISPATCH
3791		    if (adapter->batch_enable) {
3792			if (mh == NULL)
3793				mh = mt = m;
3794			else
3795				mt->m_nextpkt = m;
3796			mt = m;
3797			m->m_nextpkt = NULL;
3798			rx_sent++;
3799			current_desc = &adapter->rx_desc_base[i];
3800			continue;
3801		    }
3802#endif /* BATCH_DISPATCH */
3803			adapter->next_rx_desc_to_check = i;
3804			EM_RX_UNLOCK(adapter);
3805			if_input(ifp, m);
3806			EM_RX_LOCK(adapter);
3807			rx_sent++;
3808			i = adapter->next_rx_desc_to_check;
3809		}
3810		current_desc = &adapter->rx_desc_base[i];
3811	}
3812	adapter->next_rx_desc_to_check = i;
3813#ifdef BATCH_DISPATCH
3814	if (mh) {
3815		EM_RX_UNLOCK(adapter);
3816		while ( (mt = mh) != NULL) {
3817			mh = mh->m_nextpkt;
3818			mt->m_nextpkt = NULL;
3819			if_input(ifp, mt);
3820		}
3821		EM_RX_LOCK(adapter);
3822		i = adapter->next_rx_desc_to_check; /* in case of interrupts */
3823		if (count > 0)
3824			goto batch_again;
3825	}
3826#endif /* BATCH_DISPATCH */
3827
3828	/* Advance the E1000's Receive Queue #0  "Tail Pointer". */
3829	if (--i < 0)
3830		i = adapter->num_rx_desc - 1;
3831#ifdef NIC_PARAVIRT
3832	if (!csb_mode) /* filter out writes */
3833#endif /* NIC_PARAVIRT */
3834	E1000_WRITE_REG(&adapter->hw, E1000_RDT(0), i);
3835	if (done != NULL)
3836		*done = rx_sent;
3837	EM_RX_UNLOCK(adapter);
3838	return ((status & E1000_RXD_STAT_DD) ? TRUE : FALSE);
3839}
3840
3841#ifndef __NO_STRICT_ALIGNMENT
3842/*
3843 * When jumbo frames are enabled we should realign entire payload on
3844 * architecures with strict alignment. This is serious design mistake of 8254x
3845 * as it nullifies DMA operations. 8254x just allows RX buffer size to be
3846 * 2048/4096/8192/16384. What we really want is 2048 - ETHER_ALIGN to align its
3847 * payload. On architecures without strict alignment restrictions 8254x still
3848 * performs unaligned memory access which would reduce the performance too.
3849 * To avoid copying over an entire frame to align, we allocate a new mbuf and
3850 * copy ethernet header to the new mbuf. The new mbuf is prepended into the
3851 * existing mbuf chain.
3852 *
3853 * Be aware, best performance of the 8254x is achieved only when jumbo frame is
3854 * not used at all on architectures with strict alignment.
3855 */
3856static int
3857lem_fixup_rx(struct adapter *adapter)
3858{
3859	struct mbuf *m, *n;
3860	int error;
3861
3862	error = 0;
3863	m = adapter->fmp;
3864	if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) {
3865		bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len);
3866		m->m_data += ETHER_HDR_LEN;
3867	} else {
3868		MGETHDR(n, M_NOWAIT, MT_DATA);
3869		if (n != NULL) {
3870			bcopy(m->m_data, n->m_data, ETHER_HDR_LEN);
3871			m->m_data += ETHER_HDR_LEN;
3872			m->m_len -= ETHER_HDR_LEN;
3873			n->m_len = ETHER_HDR_LEN;
3874			M_MOVE_PKTHDR(n, m);
3875			n->m_next = m;
3876			adapter->fmp = n;
3877		} else {
3878			adapter->dropped_pkts++;
3879			m_freem(adapter->fmp);
3880			adapter->fmp = NULL;
3881			error = ENOMEM;
3882		}
3883	}
3884
3885	return (error);
3886}
3887#endif
3888
3889/*********************************************************************
3890 *
3891 *  Verify that the hardware indicated that the checksum is valid.
3892 *  Inform the stack about the status of checksum so that stack
3893 *  doesn't spend time verifying the checksum.
3894 *
3895 *********************************************************************/
3896static void
3897lem_receive_checksum(struct adapter *adapter,
3898	    struct e1000_rx_desc *rx_desc, struct mbuf *mp)
3899{
3900	/* 82543 or newer only */
3901	if ((adapter->hw.mac.type < e1000_82543) ||
3902	    /* Ignore Checksum bit is set */
3903	    (rx_desc->status & E1000_RXD_STAT_IXSM)) {
3904		mp->m_pkthdr.csum_flags = 0;
3905		return;
3906	}
3907
3908	if (rx_desc->status & E1000_RXD_STAT_IPCS) {
3909		/* Did it pass? */
3910		if (!(rx_desc->errors & E1000_RXD_ERR_IPE)) {
3911			/* IP Checksum Good */
3912			mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
3913			mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
3914
3915		} else {
3916			mp->m_pkthdr.csum_flags = 0;
3917		}
3918	}
3919
3920	if (rx_desc->status & E1000_RXD_STAT_TCPCS) {
3921		/* Did it pass? */
3922		if (!(rx_desc->errors & E1000_RXD_ERR_TCPE)) {
3923			mp->m_pkthdr.csum_flags |=
3924			(CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
3925			mp->m_pkthdr.csum_data = htons(0xffff);
3926		}
3927	}
3928}
3929
3930/*
3931 * This routine is run via an vlan
3932 * config EVENT
3933 */
3934static void
3935lem_register_vlan(void *arg, if_t ifp, u16 vtag)
3936{
3937	struct adapter	*adapter = if_getsoftc(ifp);
3938	u32		index, bit;
3939
3940	if (if_getsoftc(ifp) !=  arg)   /* Not our event */
3941		return;
3942
3943	if ((vtag == 0) || (vtag > 4095))       /* Invalid ID */
3944                return;
3945
3946	EM_CORE_LOCK(adapter);
3947	index = (vtag >> 5) & 0x7F;
3948	bit = vtag & 0x1F;
3949	adapter->shadow_vfta[index] |= (1 << bit);
3950	++adapter->num_vlans;
3951	/* Re-init to load the changes */
3952	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3953		lem_init_locked(adapter);
3954	EM_CORE_UNLOCK(adapter);
3955}
3956
3957/*
3958 * This routine is run via an vlan
3959 * unconfig EVENT
3960 */
3961static void
3962lem_unregister_vlan(void *arg, if_t ifp, u16 vtag)
3963{
3964	struct adapter	*adapter = if_getsoftc(ifp);
3965	u32		index, bit;
3966
3967	if (if_getsoftc(ifp) !=  arg)
3968		return;
3969
3970	if ((vtag == 0) || (vtag > 4095))       /* Invalid */
3971                return;
3972
3973	EM_CORE_LOCK(adapter);
3974	index = (vtag >> 5) & 0x7F;
3975	bit = vtag & 0x1F;
3976	adapter->shadow_vfta[index] &= ~(1 << bit);
3977	--adapter->num_vlans;
3978	/* Re-init to load the changes */
3979	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3980		lem_init_locked(adapter);
3981	EM_CORE_UNLOCK(adapter);
3982}
3983
3984static void
3985lem_setup_vlan_hw_support(struct adapter *adapter)
3986{
3987	struct e1000_hw *hw = &adapter->hw;
3988	u32             reg;
3989
3990	/*
3991	** We get here thru init_locked, meaning
3992	** a soft reset, this has already cleared
3993	** the VFTA and other state, so if there
3994	** have been no vlan's registered do nothing.
3995	*/
3996	if (adapter->num_vlans == 0)
3997                return;
3998
3999	/*
4000	** A soft reset zero's out the VFTA, so
4001	** we need to repopulate it now.
4002	*/
4003	for (int i = 0; i < EM_VFTA_SIZE; i++)
4004                if (adapter->shadow_vfta[i] != 0)
4005			E1000_WRITE_REG_ARRAY(hw, E1000_VFTA,
4006                            i, adapter->shadow_vfta[i]);
4007
4008	reg = E1000_READ_REG(hw, E1000_CTRL);
4009	reg |= E1000_CTRL_VME;
4010	E1000_WRITE_REG(hw, E1000_CTRL, reg);
4011
4012	/* Enable the Filter Table */
4013	reg = E1000_READ_REG(hw, E1000_RCTL);
4014	reg &= ~E1000_RCTL_CFIEN;
4015	reg |= E1000_RCTL_VFE;
4016	E1000_WRITE_REG(hw, E1000_RCTL, reg);
4017}
4018
4019static void
4020lem_enable_intr(struct adapter *adapter)
4021{
4022	struct e1000_hw *hw = &adapter->hw;
4023	u32 ims_mask = IMS_ENABLE_MASK;
4024
4025	E1000_WRITE_REG(hw, E1000_IMS, ims_mask);
4026}
4027
4028static void
4029lem_disable_intr(struct adapter *adapter)
4030{
4031	struct e1000_hw *hw = &adapter->hw;
4032
4033	E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff);
4034}
4035
4036/*
4037 * Bit of a misnomer, what this really means is
4038 * to enable OS management of the system... aka
4039 * to disable special hardware management features
4040 */
4041static void
4042lem_init_manageability(struct adapter *adapter)
4043{
4044	/* A shared code workaround */
4045	if (adapter->has_manage) {
4046		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4047		/* disable hardware interception of ARP */
4048		manc &= ~(E1000_MANC_ARP_EN);
4049		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4050	}
4051}
4052
4053/*
4054 * Give control back to hardware management
4055 * controller if there is one.
4056 */
4057static void
4058lem_release_manageability(struct adapter *adapter)
4059{
4060	if (adapter->has_manage) {
4061		int manc = E1000_READ_REG(&adapter->hw, E1000_MANC);
4062
4063		/* re-enable hardware interception of ARP */
4064		manc |= E1000_MANC_ARP_EN;
4065		E1000_WRITE_REG(&adapter->hw, E1000_MANC, manc);
4066	}
4067}
4068
4069/*
4070 * lem_get_hw_control sets the {CTRL_EXT|FWSM}:DRV_LOAD bit.
4071 * For ASF and Pass Through versions of f/w this means
4072 * that the driver is loaded. For AMT version type f/w
4073 * this means that the network i/f is open.
4074 */
4075static void
4076lem_get_hw_control(struct adapter *adapter)
4077{
4078	u32 ctrl_ext;
4079
4080	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4081	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4082	    ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
4083	return;
4084}
4085
4086/*
4087 * lem_release_hw_control resets {CTRL_EXT|FWSM}:DRV_LOAD bit.
4088 * For ASF and Pass Through versions of f/w this means that
4089 * the driver is no longer loaded. For AMT versions of the
4090 * f/w this means that the network i/f is closed.
4091 */
4092static void
4093lem_release_hw_control(struct adapter *adapter)
4094{
4095	u32 ctrl_ext;
4096
4097	if (!adapter->has_manage)
4098		return;
4099
4100	ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4101	E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT,
4102	    ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
4103	return;
4104}
4105
4106static int
4107lem_is_valid_ether_addr(u8 *addr)
4108{
4109	char zero_addr[6] = { 0, 0, 0, 0, 0, 0 };
4110
4111	if ((addr[0] & 1) || (!bcmp(addr, zero_addr, ETHER_ADDR_LEN))) {
4112		return (FALSE);
4113	}
4114
4115	return (TRUE);
4116}
4117
4118/*
4119** Parse the interface capabilities with regard
4120** to both system management and wake-on-lan for
4121** later use.
4122*/
4123static void
4124lem_get_wakeup(device_t dev)
4125{
4126	struct adapter	*adapter = device_get_softc(dev);
4127	u16		eeprom_data = 0, device_id, apme_mask;
4128
4129	adapter->has_manage = e1000_enable_mng_pass_thru(&adapter->hw);
4130	apme_mask = EM_EEPROM_APME;
4131
4132	switch (adapter->hw.mac.type) {
4133	case e1000_82542:
4134	case e1000_82543:
4135		break;
4136	case e1000_82544:
4137		e1000_read_nvm(&adapter->hw,
4138		    NVM_INIT_CONTROL2_REG, 1, &eeprom_data);
4139		apme_mask = EM_82544_APME;
4140		break;
4141	case e1000_82546:
4142	case e1000_82546_rev_3:
4143		if (adapter->hw.bus.func == 1) {
4144			e1000_read_nvm(&adapter->hw,
4145			    NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
4146			break;
4147		} else
4148			e1000_read_nvm(&adapter->hw,
4149			    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4150		break;
4151	default:
4152		e1000_read_nvm(&adapter->hw,
4153		    NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
4154		break;
4155	}
4156	if (eeprom_data & apme_mask)
4157		adapter->wol = (E1000_WUFC_MAG | E1000_WUFC_MC);
4158	/*
4159         * We have the eeprom settings, now apply the special cases
4160         * where the eeprom may be wrong or the board won't support
4161         * wake on lan on a particular port
4162	 */
4163	device_id = pci_get_device(dev);
4164        switch (device_id) {
4165	case E1000_DEV_ID_82546GB_PCIE:
4166		adapter->wol = 0;
4167		break;
4168	case E1000_DEV_ID_82546EB_FIBER:
4169	case E1000_DEV_ID_82546GB_FIBER:
4170		/* Wake events only supported on port A for dual fiber
4171		 * regardless of eeprom setting */
4172		if (E1000_READ_REG(&adapter->hw, E1000_STATUS) &
4173		    E1000_STATUS_FUNC_1)
4174			adapter->wol = 0;
4175		break;
4176	case E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3:
4177                /* if quad port adapter, disable WoL on all but port A */
4178		if (global_quad_port_a != 0)
4179			adapter->wol = 0;
4180		/* Reset for multiple quad port adapters */
4181		if (++global_quad_port_a == 4)
4182			global_quad_port_a = 0;
4183                break;
4184	}
4185	return;
4186}
4187
4188
4189/*
4190 * Enable PCI Wake On Lan capability
4191 */
4192static void
4193lem_enable_wakeup(device_t dev)
4194{
4195	struct adapter	*adapter = device_get_softc(dev);
4196	if_t ifp = adapter->ifp;
4197	u32		pmc, ctrl, ctrl_ext, rctl;
4198	u16     	status;
4199
4200	if ((pci_find_cap(dev, PCIY_PMG, &pmc) != 0))
4201		return;
4202
4203	/* Advertise the wakeup capability */
4204	ctrl = E1000_READ_REG(&adapter->hw, E1000_CTRL);
4205	ctrl |= (E1000_CTRL_SWDPIN2 | E1000_CTRL_SWDPIN3);
4206	E1000_WRITE_REG(&adapter->hw, E1000_CTRL, ctrl);
4207	E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4208
4209	/* Keep the laser running on Fiber adapters */
4210	if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
4211	    adapter->hw.phy.media_type == e1000_media_type_internal_serdes) {
4212		ctrl_ext = E1000_READ_REG(&adapter->hw, E1000_CTRL_EXT);
4213		ctrl_ext |= E1000_CTRL_EXT_SDP3_DATA;
4214		E1000_WRITE_REG(&adapter->hw, E1000_CTRL_EXT, ctrl_ext);
4215	}
4216
4217	/*
4218	** Determine type of Wakeup: note that wol
4219	** is set with all bits on by default.
4220	*/
4221	if ((if_getcapenable(ifp) & IFCAP_WOL_MAGIC) == 0)
4222		adapter->wol &= ~E1000_WUFC_MAG;
4223
4224	if ((if_getcapenable(ifp) & IFCAP_WOL_MCAST) == 0)
4225		adapter->wol &= ~E1000_WUFC_MC;
4226	else {
4227		rctl = E1000_READ_REG(&adapter->hw, E1000_RCTL);
4228		rctl |= E1000_RCTL_MPE;
4229		E1000_WRITE_REG(&adapter->hw, E1000_RCTL, rctl);
4230	}
4231
4232	if (adapter->hw.mac.type == e1000_pchlan) {
4233		if (lem_enable_phy_wakeup(adapter))
4234			return;
4235	} else {
4236		E1000_WRITE_REG(&adapter->hw, E1000_WUC, E1000_WUC_PME_EN);
4237		E1000_WRITE_REG(&adapter->hw, E1000_WUFC, adapter->wol);
4238	}
4239
4240
4241        /* Request PME */
4242        status = pci_read_config(dev, pmc + PCIR_POWER_STATUS, 2);
4243	status &= ~(PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE);
4244	if (if_getcapenable(ifp) & IFCAP_WOL)
4245		status |= PCIM_PSTAT_PME | PCIM_PSTAT_PMEENABLE;
4246        pci_write_config(dev, pmc + PCIR_POWER_STATUS, status, 2);
4247
4248	return;
4249}
4250
4251/*
4252** WOL in the newer chipset interfaces (pchlan)
4253** require thing to be copied into the phy
4254*/
4255static int
4256lem_enable_phy_wakeup(struct adapter *adapter)
4257{
4258	struct e1000_hw *hw = &adapter->hw;
4259	u32 mreg, ret = 0;
4260	u16 preg;
4261
4262	/* copy MAC RARs to PHY RARs */
4263	for (int i = 0; i < adapter->hw.mac.rar_entry_count; i++) {
4264		mreg = E1000_READ_REG(hw, E1000_RAL(i));
4265		e1000_write_phy_reg(hw, BM_RAR_L(i), (u16)(mreg & 0xFFFF));
4266		e1000_write_phy_reg(hw, BM_RAR_M(i),
4267		    (u16)((mreg >> 16) & 0xFFFF));
4268		mreg = E1000_READ_REG(hw, E1000_RAH(i));
4269		e1000_write_phy_reg(hw, BM_RAR_H(i), (u16)(mreg & 0xFFFF));
4270		e1000_write_phy_reg(hw, BM_RAR_CTRL(i),
4271		    (u16)((mreg >> 16) & 0xFFFF));
4272	}
4273
4274	/* copy MAC MTA to PHY MTA */
4275	for (int i = 0; i < adapter->hw.mac.mta_reg_count; i++) {
4276		mreg = E1000_READ_REG_ARRAY(hw, E1000_MTA, i);
4277		e1000_write_phy_reg(hw, BM_MTA(i), (u16)(mreg & 0xFFFF));
4278		e1000_write_phy_reg(hw, BM_MTA(i) + 1,
4279		    (u16)((mreg >> 16) & 0xFFFF));
4280	}
4281
4282	/* configure PHY Rx Control register */
4283	e1000_read_phy_reg(&adapter->hw, BM_RCTL, &preg);
4284	mreg = E1000_READ_REG(hw, E1000_RCTL);
4285	if (mreg & E1000_RCTL_UPE)
4286		preg |= BM_RCTL_UPE;
4287	if (mreg & E1000_RCTL_MPE)
4288		preg |= BM_RCTL_MPE;
4289	preg &= ~(BM_RCTL_MO_MASK);
4290	if (mreg & E1000_RCTL_MO_3)
4291		preg |= (((mreg & E1000_RCTL_MO_3) >> E1000_RCTL_MO_SHIFT)
4292				<< BM_RCTL_MO_SHIFT);
4293	if (mreg & E1000_RCTL_BAM)
4294		preg |= BM_RCTL_BAM;
4295	if (mreg & E1000_RCTL_PMCF)
4296		preg |= BM_RCTL_PMCF;
4297	mreg = E1000_READ_REG(hw, E1000_CTRL);
4298	if (mreg & E1000_CTRL_RFCE)
4299		preg |= BM_RCTL_RFCE;
4300	e1000_write_phy_reg(&adapter->hw, BM_RCTL, preg);
4301
4302	/* enable PHY wakeup in MAC register */
4303	E1000_WRITE_REG(hw, E1000_WUC,
4304	    E1000_WUC_PHY_WAKE | E1000_WUC_PME_EN);
4305	E1000_WRITE_REG(hw, E1000_WUFC, adapter->wol);
4306
4307	/* configure and enable PHY wakeup in PHY registers */
4308	e1000_write_phy_reg(&adapter->hw, BM_WUFC, adapter->wol);
4309	e1000_write_phy_reg(&adapter->hw, BM_WUC, E1000_WUC_PME_EN);
4310
4311	/* activate PHY wakeup */
4312	ret = hw->phy.ops.acquire(hw);
4313	if (ret) {
4314		printf("Could not acquire PHY\n");
4315		return ret;
4316	}
4317	e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
4318	                         (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
4319	ret = e1000_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &preg);
4320	if (ret) {
4321		printf("Could not read PHY page 769\n");
4322		goto out;
4323	}
4324	preg |= BM_WUC_ENABLE_BIT | BM_WUC_HOST_WU_BIT;
4325	ret = e1000_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, preg);
4326	if (ret)
4327		printf("Could not set PHY Host Wakeup bit\n");
4328out:
4329	hw->phy.ops.release(hw);
4330
4331	return ret;
4332}
4333
4334static void
4335lem_led_func(void *arg, int onoff)
4336{
4337	struct adapter	*adapter = arg;
4338
4339	EM_CORE_LOCK(adapter);
4340	if (onoff) {
4341		e1000_setup_led(&adapter->hw);
4342		e1000_led_on(&adapter->hw);
4343	} else {
4344		e1000_led_off(&adapter->hw);
4345		e1000_cleanup_led(&adapter->hw);
4346	}
4347	EM_CORE_UNLOCK(adapter);
4348}
4349
4350/*********************************************************************
4351* 82544 Coexistence issue workaround.
4352*    There are 2 issues.
4353*       1. Transmit Hang issue.
4354*    To detect this issue, following equation can be used...
4355*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4356*	  If SUM[3:0] is in between 1 to 4, we will have this issue.
4357*
4358*       2. DAC issue.
4359*    To detect this issue, following equation can be used...
4360*	  SIZE[3:0] + ADDR[2:0] = SUM[3:0].
4361*	  If SUM[3:0] is in between 9 to c, we will have this issue.
4362*
4363*
4364*    WORKAROUND:
4365*	  Make sure we do not have ending address
4366*	  as 1,2,3,4(Hang) or 9,a,b,c (DAC)
4367*
4368*************************************************************************/
4369static u32
4370lem_fill_descriptors (bus_addr_t address, u32 length,
4371		PDESC_ARRAY desc_array)
4372{
4373	u32 safe_terminator;
4374
4375	/* Since issue is sensitive to length and address.*/
4376	/* Let us first check the address...*/
4377	if (length <= 4) {
4378		desc_array->descriptor[0].address = address;
4379		desc_array->descriptor[0].length = length;
4380		desc_array->elements = 1;
4381		return (desc_array->elements);
4382	}
4383	safe_terminator = (u32)((((u32)address & 0x7) +
4384	    (length & 0xF)) & 0xF);
4385	/* if it does not fall between 0x1 to 0x4 and 0x9 to 0xC then return */
4386	if (safe_terminator == 0   ||
4387	(safe_terminator > 4   &&
4388	safe_terminator < 9)   ||
4389	(safe_terminator > 0xC &&
4390	safe_terminator <= 0xF)) {
4391		desc_array->descriptor[0].address = address;
4392		desc_array->descriptor[0].length = length;
4393		desc_array->elements = 1;
4394		return (desc_array->elements);
4395	}
4396
4397	desc_array->descriptor[0].address = address;
4398	desc_array->descriptor[0].length = length - 4;
4399	desc_array->descriptor[1].address = address + (length - 4);
4400	desc_array->descriptor[1].length = 4;
4401	desc_array->elements = 2;
4402	return (desc_array->elements);
4403}
4404
4405/**********************************************************************
4406 *
4407 *  Update the board statistics counters.
4408 *
4409 **********************************************************************/
4410static void
4411lem_update_stats_counters(struct adapter *adapter)
4412{
4413
4414	if(adapter->hw.phy.media_type == e1000_media_type_copper ||
4415	   (E1000_READ_REG(&adapter->hw, E1000_STATUS) & E1000_STATUS_LU)) {
4416		adapter->stats.symerrs += E1000_READ_REG(&adapter->hw, E1000_SYMERRS);
4417		adapter->stats.sec += E1000_READ_REG(&adapter->hw, E1000_SEC);
4418	}
4419	adapter->stats.crcerrs += E1000_READ_REG(&adapter->hw, E1000_CRCERRS);
4420	adapter->stats.mpc += E1000_READ_REG(&adapter->hw, E1000_MPC);
4421	adapter->stats.scc += E1000_READ_REG(&adapter->hw, E1000_SCC);
4422	adapter->stats.ecol += E1000_READ_REG(&adapter->hw, E1000_ECOL);
4423
4424	adapter->stats.mcc += E1000_READ_REG(&adapter->hw, E1000_MCC);
4425	adapter->stats.latecol += E1000_READ_REG(&adapter->hw, E1000_LATECOL);
4426	adapter->stats.colc += E1000_READ_REG(&adapter->hw, E1000_COLC);
4427	adapter->stats.dc += E1000_READ_REG(&adapter->hw, E1000_DC);
4428	adapter->stats.rlec += E1000_READ_REG(&adapter->hw, E1000_RLEC);
4429	adapter->stats.xonrxc += E1000_READ_REG(&adapter->hw, E1000_XONRXC);
4430	adapter->stats.xontxc += E1000_READ_REG(&adapter->hw, E1000_XONTXC);
4431	adapter->stats.xoffrxc += E1000_READ_REG(&adapter->hw, E1000_XOFFRXC);
4432	adapter->stats.xofftxc += E1000_READ_REG(&adapter->hw, E1000_XOFFTXC);
4433	adapter->stats.fcruc += E1000_READ_REG(&adapter->hw, E1000_FCRUC);
4434	adapter->stats.prc64 += E1000_READ_REG(&adapter->hw, E1000_PRC64);
4435	adapter->stats.prc127 += E1000_READ_REG(&adapter->hw, E1000_PRC127);
4436	adapter->stats.prc255 += E1000_READ_REG(&adapter->hw, E1000_PRC255);
4437	adapter->stats.prc511 += E1000_READ_REG(&adapter->hw, E1000_PRC511);
4438	adapter->stats.prc1023 += E1000_READ_REG(&adapter->hw, E1000_PRC1023);
4439	adapter->stats.prc1522 += E1000_READ_REG(&adapter->hw, E1000_PRC1522);
4440	adapter->stats.gprc += E1000_READ_REG(&adapter->hw, E1000_GPRC);
4441	adapter->stats.bprc += E1000_READ_REG(&adapter->hw, E1000_BPRC);
4442	adapter->stats.mprc += E1000_READ_REG(&adapter->hw, E1000_MPRC);
4443	adapter->stats.gptc += E1000_READ_REG(&adapter->hw, E1000_GPTC);
4444
4445	/* For the 64-bit byte counters the low dword must be read first. */
4446	/* Both registers clear on the read of the high dword */
4447
4448	adapter->stats.gorc += E1000_READ_REG(&adapter->hw, E1000_GORCL) +
4449	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GORCH) << 32);
4450	adapter->stats.gotc += E1000_READ_REG(&adapter->hw, E1000_GOTCL) +
4451	    ((u64)E1000_READ_REG(&adapter->hw, E1000_GOTCH) << 32);
4452
4453	adapter->stats.rnbc += E1000_READ_REG(&adapter->hw, E1000_RNBC);
4454	adapter->stats.ruc += E1000_READ_REG(&adapter->hw, E1000_RUC);
4455	adapter->stats.rfc += E1000_READ_REG(&adapter->hw, E1000_RFC);
4456	adapter->stats.roc += E1000_READ_REG(&adapter->hw, E1000_ROC);
4457	adapter->stats.rjc += E1000_READ_REG(&adapter->hw, E1000_RJC);
4458
4459	adapter->stats.tor += E1000_READ_REG(&adapter->hw, E1000_TORH);
4460	adapter->stats.tot += E1000_READ_REG(&adapter->hw, E1000_TOTH);
4461
4462	adapter->stats.tpr += E1000_READ_REG(&adapter->hw, E1000_TPR);
4463	adapter->stats.tpt += E1000_READ_REG(&adapter->hw, E1000_TPT);
4464	adapter->stats.ptc64 += E1000_READ_REG(&adapter->hw, E1000_PTC64);
4465	adapter->stats.ptc127 += E1000_READ_REG(&adapter->hw, E1000_PTC127);
4466	adapter->stats.ptc255 += E1000_READ_REG(&adapter->hw, E1000_PTC255);
4467	adapter->stats.ptc511 += E1000_READ_REG(&adapter->hw, E1000_PTC511);
4468	adapter->stats.ptc1023 += E1000_READ_REG(&adapter->hw, E1000_PTC1023);
4469	adapter->stats.ptc1522 += E1000_READ_REG(&adapter->hw, E1000_PTC1522);
4470	adapter->stats.mptc += E1000_READ_REG(&adapter->hw, E1000_MPTC);
4471	adapter->stats.bptc += E1000_READ_REG(&adapter->hw, E1000_BPTC);
4472
4473	if (adapter->hw.mac.type >= e1000_82543) {
4474		adapter->stats.algnerrc +=
4475		E1000_READ_REG(&adapter->hw, E1000_ALGNERRC);
4476		adapter->stats.rxerrc +=
4477		E1000_READ_REG(&adapter->hw, E1000_RXERRC);
4478		adapter->stats.tncrs +=
4479		E1000_READ_REG(&adapter->hw, E1000_TNCRS);
4480		adapter->stats.cexterr +=
4481		E1000_READ_REG(&adapter->hw, E1000_CEXTERR);
4482		adapter->stats.tsctc +=
4483		E1000_READ_REG(&adapter->hw, E1000_TSCTC);
4484		adapter->stats.tsctfc +=
4485		E1000_READ_REG(&adapter->hw, E1000_TSCTFC);
4486	}
4487}
4488
4489static uint64_t
4490lem_get_counter(if_t ifp, ift_counter cnt)
4491{
4492	struct adapter *adapter;
4493
4494	adapter = if_getsoftc(ifp);
4495
4496	switch (cnt) {
4497	case IFCOUNTER_COLLISIONS:
4498		return (adapter->stats.colc);
4499	case IFCOUNTER_IERRORS:
4500		return (adapter->dropped_pkts + adapter->stats.rxerrc +
4501		    adapter->stats.crcerrs + adapter->stats.algnerrc +
4502		    adapter->stats.ruc + adapter->stats.roc +
4503		    adapter->stats.mpc + adapter->stats.cexterr);
4504	case IFCOUNTER_OERRORS:
4505		return (adapter->stats.ecol + adapter->stats.latecol +
4506		    adapter->watchdog_events);
4507	default:
4508		return (if_get_counter_default(ifp, cnt));
4509	}
4510}
4511
4512/* Export a single 32-bit register via a read-only sysctl. */
4513static int
4514lem_sysctl_reg_handler(SYSCTL_HANDLER_ARGS)
4515{
4516	struct adapter *adapter;
4517	u_int val;
4518
4519	adapter = oidp->oid_arg1;
4520	val = E1000_READ_REG(&adapter->hw, oidp->oid_arg2);
4521	return (sysctl_handle_int(oidp, &val, 0, req));
4522}
4523
4524/*
4525 * Add sysctl variables, one per statistic, to the system.
4526 */
4527static void
4528lem_add_hw_stats(struct adapter *adapter)
4529{
4530	device_t dev = adapter->dev;
4531
4532	struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
4533	struct sysctl_oid *tree = device_get_sysctl_tree(dev);
4534	struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
4535	struct e1000_hw_stats *stats = &adapter->stats;
4536
4537	struct sysctl_oid *stat_node;
4538	struct sysctl_oid_list *stat_list;
4539
4540	/* Driver Statistics */
4541	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "cluster_alloc_fail",
4542			 CTLFLAG_RD, &adapter->mbuf_cluster_failed,
4543			 "Std mbuf cluster failed");
4544	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_fail",
4545			 CTLFLAG_RD, &adapter->mbuf_defrag_failed,
4546			 "Defragmenting mbuf chain failed");
4547	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped",
4548			CTLFLAG_RD, &adapter->dropped_pkts,
4549			"Driver dropped packets");
4550	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_dma_fail",
4551			CTLFLAG_RD, &adapter->no_tx_dma_setup,
4552			"Driver tx dma failure in xmit");
4553	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail1",
4554			CTLFLAG_RD, &adapter->no_tx_desc_avail1,
4555			"Not enough tx descriptors failure in xmit");
4556	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_desc_fail2",
4557			CTLFLAG_RD, &adapter->no_tx_desc_avail2,
4558			"Not enough tx descriptors failure in xmit");
4559	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_overruns",
4560			CTLFLAG_RD, &adapter->rx_overruns,
4561			"RX overruns");
4562	SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_timeouts",
4563			CTLFLAG_RD, &adapter->watchdog_events,
4564			"Watchdog timeouts");
4565
4566	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "device_control",
4567			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_CTRL,
4568			lem_sysctl_reg_handler, "IU",
4569			"Device Control Register");
4570	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_control",
4571			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RCTL,
4572			lem_sysctl_reg_handler, "IU",
4573			"Receiver Control Register");
4574	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_high_water",
4575			CTLFLAG_RD, &adapter->hw.fc.high_water, 0,
4576			"Flow Control High Watermark");
4577	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "fc_low_water",
4578			CTLFLAG_RD, &adapter->hw.fc.low_water, 0,
4579			"Flow Control Low Watermark");
4580	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_workaround",
4581			CTLFLAG_RD, &adapter->tx_fifo_wrk_cnt,
4582			"TX FIFO workaround events");
4583	SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "fifo_reset",
4584			CTLFLAG_RD, &adapter->tx_fifo_reset_cnt,
4585			"TX FIFO resets");
4586
4587	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_head",
4588			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDH(0),
4589			lem_sysctl_reg_handler, "IU",
4590 			"Transmit Descriptor Head");
4591	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txd_tail",
4592			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_TDT(0),
4593			lem_sysctl_reg_handler, "IU",
4594 			"Transmit Descriptor Tail");
4595	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_head",
4596			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDH(0),
4597			lem_sysctl_reg_handler, "IU",
4598			"Receive Descriptor Head");
4599	SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxd_tail",
4600			CTLTYPE_UINT | CTLFLAG_RD, adapter, E1000_RDT(0),
4601			lem_sysctl_reg_handler, "IU",
4602			"Receive Descriptor Tail");
4603
4604
4605	/* MAC stats get their own sub node */
4606
4607	stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats",
4608				    CTLFLAG_RD, NULL, "Statistics");
4609	stat_list = SYSCTL_CHILDREN(stat_node);
4610
4611	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "excess_coll",
4612			CTLFLAG_RD, &stats->ecol,
4613			"Excessive collisions");
4614	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "single_coll",
4615			CTLFLAG_RD, &stats->scc,
4616			"Single collisions");
4617	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "multiple_coll",
4618			CTLFLAG_RD, &stats->mcc,
4619			"Multiple collisions");
4620	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "late_coll",
4621			CTLFLAG_RD, &stats->latecol,
4622			"Late collisions");
4623	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "collision_count",
4624			CTLFLAG_RD, &stats->colc,
4625			"Collision Count");
4626	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "symbol_errors",
4627			CTLFLAG_RD, &adapter->stats.symerrs,
4628			"Symbol Errors");
4629	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "sequence_errors",
4630			CTLFLAG_RD, &adapter->stats.sec,
4631			"Sequence Errors");
4632	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "defer_count",
4633			CTLFLAG_RD, &adapter->stats.dc,
4634			"Defer Count");
4635	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "missed_packets",
4636			CTLFLAG_RD, &adapter->stats.mpc,
4637			"Missed Packets");
4638	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_no_buff",
4639			CTLFLAG_RD, &adapter->stats.rnbc,
4640			"Receive No Buffers");
4641	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersize",
4642			CTLFLAG_RD, &adapter->stats.ruc,
4643			"Receive Undersize");
4644	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented",
4645			CTLFLAG_RD, &adapter->stats.rfc,
4646			"Fragmented Packets Received ");
4647	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversize",
4648			CTLFLAG_RD, &adapter->stats.roc,
4649			"Oversized Packets Received");
4650	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabber",
4651			CTLFLAG_RD, &adapter->stats.rjc,
4652			"Recevied Jabber");
4653	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_errs",
4654			CTLFLAG_RD, &adapter->stats.rxerrc,
4655			"Receive Errors");
4656	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs",
4657			CTLFLAG_RD, &adapter->stats.crcerrs,
4658			"CRC errors");
4659	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "alignment_errs",
4660			CTLFLAG_RD, &adapter->stats.algnerrc,
4661			"Alignment Errors");
4662	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "coll_ext_errs",
4663			CTLFLAG_RD, &adapter->stats.cexterr,
4664			"Collision/Carrier extension errors");
4665	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd",
4666			CTLFLAG_RD, &adapter->stats.xonrxc,
4667			"XON Received");
4668	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd",
4669			CTLFLAG_RD, &adapter->stats.xontxc,
4670			"XON Transmitted");
4671	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd",
4672			CTLFLAG_RD, &adapter->stats.xoffrxc,
4673			"XOFF Received");
4674	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd",
4675			CTLFLAG_RD, &adapter->stats.xofftxc,
4676			"XOFF Transmitted");
4677
4678	/* Packet Reception Stats */
4679	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_recvd",
4680			CTLFLAG_RD, &adapter->stats.tpr,
4681			"Total Packets Received ");
4682	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_recvd",
4683			CTLFLAG_RD, &adapter->stats.gprc,
4684			"Good Packets Received");
4685	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_recvd",
4686			CTLFLAG_RD, &adapter->stats.bprc,
4687			"Broadcast Packets Received");
4688	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_recvd",
4689			CTLFLAG_RD, &adapter->stats.mprc,
4690			"Multicast Packets Received");
4691	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64",
4692			CTLFLAG_RD, &adapter->stats.prc64,
4693			"64 byte frames received ");
4694	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127",
4695			CTLFLAG_RD, &adapter->stats.prc127,
4696			"65-127 byte frames received");
4697	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255",
4698			CTLFLAG_RD, &adapter->stats.prc255,
4699			"128-255 byte frames received");
4700	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511",
4701			CTLFLAG_RD, &adapter->stats.prc511,
4702			"256-511 byte frames received");
4703	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023",
4704			CTLFLAG_RD, &adapter->stats.prc1023,
4705			"512-1023 byte frames received");
4706	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522",
4707			CTLFLAG_RD, &adapter->stats.prc1522,
4708			"1023-1522 byte frames received");
4709 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_recvd",
4710 			CTLFLAG_RD, &adapter->stats.gorc,
4711 			"Good Octets Received");
4712
4713	/* Packet Transmission Stats */
4714 	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd",
4715 			CTLFLAG_RD, &adapter->stats.gotc,
4716 			"Good Octets Transmitted");
4717	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd",
4718			CTLFLAG_RD, &adapter->stats.tpt,
4719			"Total Packets Transmitted");
4720	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd",
4721			CTLFLAG_RD, &adapter->stats.gptc,
4722			"Good Packets Transmitted");
4723	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd",
4724			CTLFLAG_RD, &adapter->stats.bptc,
4725			"Broadcast Packets Transmitted");
4726	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd",
4727			CTLFLAG_RD, &adapter->stats.mptc,
4728			"Multicast Packets Transmitted");
4729	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64",
4730			CTLFLAG_RD, &adapter->stats.ptc64,
4731			"64 byte frames transmitted ");
4732	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127",
4733			CTLFLAG_RD, &adapter->stats.ptc127,
4734			"65-127 byte frames transmitted");
4735	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255",
4736			CTLFLAG_RD, &adapter->stats.ptc255,
4737			"128-255 byte frames transmitted");
4738	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511",
4739			CTLFLAG_RD, &adapter->stats.ptc511,
4740			"256-511 byte frames transmitted");
4741	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023",
4742			CTLFLAG_RD, &adapter->stats.ptc1023,
4743			"512-1023 byte frames transmitted");
4744	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522",
4745			CTLFLAG_RD, &adapter->stats.ptc1522,
4746			"1024-1522 byte frames transmitted");
4747	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_txd",
4748			CTLFLAG_RD, &adapter->stats.tsctc,
4749			"TSO Contexts Transmitted");
4750	SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tso_ctx_fail",
4751			CTLFLAG_RD, &adapter->stats.tsctfc,
4752			"TSO Contexts Failed");
4753}
4754
4755/**********************************************************************
4756 *
4757 *  This routine provides a way to dump out the adapter eeprom,
4758 *  often a useful debug/service tool. This only dumps the first
4759 *  32 words, stuff that matters is in that extent.
4760 *
4761 **********************************************************************/
4762
4763static int
4764lem_sysctl_nvm_info(SYSCTL_HANDLER_ARGS)
4765{
4766	struct adapter *adapter;
4767	int error;
4768	int result;
4769
4770	result = -1;
4771	error = sysctl_handle_int(oidp, &result, 0, req);
4772
4773	if (error || !req->newptr)
4774		return (error);
4775
4776	/*
4777	 * This value will cause a hex dump of the
4778	 * first 32 16-bit words of the EEPROM to
4779	 * the screen.
4780	 */
4781	if (result == 1) {
4782		adapter = (struct adapter *)arg1;
4783		lem_print_nvm_info(adapter);
4784        }
4785
4786	return (error);
4787}
4788
4789static void
4790lem_print_nvm_info(struct adapter *adapter)
4791{
4792	u16	eeprom_data;
4793	int	i, j, row = 0;
4794
4795	/* Its a bit crude, but it gets the job done */
4796	printf("\nInterface EEPROM Dump:\n");
4797	printf("Offset\n0x0000  ");
4798	for (i = 0, j = 0; i < 32; i++, j++) {
4799		if (j == 8) { /* Make the offset block */
4800			j = 0; ++row;
4801			printf("\n0x00%x0  ",row);
4802		}
4803		e1000_read_nvm(&adapter->hw, i, 1, &eeprom_data);
4804		printf("%04x ", eeprom_data);
4805	}
4806	printf("\n");
4807}
4808
4809static int
4810lem_sysctl_int_delay(SYSCTL_HANDLER_ARGS)
4811{
4812	struct em_int_delay_info *info;
4813	struct adapter *adapter;
4814	u32 regval;
4815	int error;
4816	int usecs;
4817	int ticks;
4818
4819	info = (struct em_int_delay_info *)arg1;
4820	usecs = info->value;
4821	error = sysctl_handle_int(oidp, &usecs, 0, req);
4822	if (error != 0 || req->newptr == NULL)
4823		return (error);
4824	if (usecs < 0 || usecs > EM_TICKS_TO_USECS(65535))
4825		return (EINVAL);
4826	info->value = usecs;
4827	ticks = EM_USECS_TO_TICKS(usecs);
4828	if (info->offset == E1000_ITR)	/* units are 256ns here */
4829		ticks *= 4;
4830
4831	adapter = info->adapter;
4832
4833	EM_CORE_LOCK(adapter);
4834	regval = E1000_READ_OFFSET(&adapter->hw, info->offset);
4835	regval = (regval & ~0xffff) | (ticks & 0xffff);
4836	/* Handle a few special cases. */
4837	switch (info->offset) {
4838	case E1000_RDTR:
4839		break;
4840	case E1000_TIDV:
4841		if (ticks == 0) {
4842			adapter->txd_cmd &= ~E1000_TXD_CMD_IDE;
4843			/* Don't write 0 into the TIDV register. */
4844			regval++;
4845		} else
4846			adapter->txd_cmd |= E1000_TXD_CMD_IDE;
4847		break;
4848	}
4849	E1000_WRITE_OFFSET(&adapter->hw, info->offset, regval);
4850	EM_CORE_UNLOCK(adapter);
4851	return (0);
4852}
4853
4854static void
4855lem_add_int_delay_sysctl(struct adapter *adapter, const char *name,
4856	const char *description, struct em_int_delay_info *info,
4857	int offset, int value)
4858{
4859	info->adapter = adapter;
4860	info->offset = offset;
4861	info->value = value;
4862	SYSCTL_ADD_PROC(device_get_sysctl_ctx(adapter->dev),
4863	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4864	    OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW,
4865	    info, 0, lem_sysctl_int_delay, "I", description);
4866}
4867
4868static void
4869lem_set_flow_cntrl(struct adapter *adapter, const char *name,
4870        const char *description, int *limit, int value)
4871{
4872	*limit = value;
4873	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4874	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4875	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
4876}
4877
4878static void
4879lem_add_rx_process_limit(struct adapter *adapter, const char *name,
4880	const char *description, int *limit, int value)
4881{
4882	*limit = value;
4883	SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev),
4884	    SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)),
4885	    OID_AUTO, name, CTLFLAG_RW, limit, value, description);
4886}
4887