From: kettenis Date: Mon, 12 May 2014 19:29:16 +0000 (+0000) Subject: Move GTT management for Sandy Bridge and up into inteldrm(4). This makes X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=b2a79596449478222a61a2218324f4d2701f2f46;p=openbsd Move GTT management for Sandy Bridge and up into inteldrm(4). This makes it possible to use the non-mappable part of the GTT, prepares the way for using the PPGTT and reduces the diffs with Linux. ok jsg@ --- diff --git a/sys/dev/pci/agp_i810.c b/sys/dev/pci/agp_i810.c index 94a60ebe0d3..8ba5968e236 100644 --- a/sys/dev/pci/agp_i810.c +++ b/sys/dev/pci/agp_i810.c @@ -1,4 +1,4 @@ -/* $OpenBSD: agp_i810.c,v 1.85 2014/03/26 14:41:41 mpi Exp $ */ +/* $OpenBSD: agp_i810.c,v 1.86 2014/05/12 19:29:16 kettenis Exp $ */ /*- * Copyright (c) 2000 Doug Rabson @@ -57,11 +57,6 @@ /* Memory is snooped, must not be accessed through gtt from the cpu. */ #define INTEL_COHERENT 0x6 -#define GEN6_PTE_UNCACHED (1 << 1) -#define HSW_PTE_UNCACHED (0) -#define GEN6_PTE_CACHE_LLC (2 << 1) -#define GEN6_PTE_CACHE_LLC_MLC (3 << 1) - enum { CHIP_NONE = 0, /* not integrated graphics */ CHIP_I810 = 1, /* i810/i815 */ @@ -73,9 +68,6 @@ enum { CHIP_G4X = 7, /* G4X */ CHIP_PINEVIEW = 8, /* Pineview/Pineview M */ CHIP_IRONLAKE = 9, /* Clarkdale/Arrandale */ - CHIP_SANDYBRIDGE=10, /* Sandybridge */ - CHIP_IVYBRIDGE =11, /* Ivybridge */ - CHIP_HASWELL =12, /* Haswell */ }; struct agp_i810_softc { @@ -201,84 +193,6 @@ agp_i810_get_chiptype(struct pci_attach_args *pa) case PCI_PRODUCT_INTEL_ARRANDALE_IGD: return (CHIP_IRONLAKE); break; - case PCI_PRODUCT_INTEL_CORE2G_GT1: - case PCI_PRODUCT_INTEL_CORE2G_M_GT1: - case PCI_PRODUCT_INTEL_CORE2G_S_GT: - case PCI_PRODUCT_INTEL_CORE2G_GT2: - case PCI_PRODUCT_INTEL_CORE2G_M_GT2: - case PCI_PRODUCT_INTEL_CORE2G_GT2_PLUS: - case PCI_PRODUCT_INTEL_CORE2G_M_GT2_PLUS: - return (CHIP_SANDYBRIDGE); - break; - case PCI_PRODUCT_INTEL_CORE3G_D_GT1: - case PCI_PRODUCT_INTEL_CORE3G_M_GT1: - case PCI_PRODUCT_INTEL_CORE3G_S_GT1: - case PCI_PRODUCT_INTEL_CORE3G_D_GT2: - case PCI_PRODUCT_INTEL_CORE3G_M_GT2: - case PCI_PRODUCT_INTEL_CORE3G_S_GT2: - return (CHIP_IVYBRIDGE); - case PCI_PRODUCT_INTEL_CORE4G_D_GT1: - case PCI_PRODUCT_INTEL_CORE4G_D_GT2: - case PCI_PRODUCT_INTEL_CORE4G_D_GT3: - case PCI_PRODUCT_INTEL_CORE4G_S_GT1: - case PCI_PRODUCT_INTEL_CORE4G_S_GT2: - case PCI_PRODUCT_INTEL_CORE4G_S_GT3: - case PCI_PRODUCT_INTEL_CORE4G_M_GT1: - case PCI_PRODUCT_INTEL_CORE4G_M_GT2: - case PCI_PRODUCT_INTEL_CORE4G_M_GT2_2: - case PCI_PRODUCT_INTEL_CORE4G_R_GT1_1: - case PCI_PRODUCT_INTEL_CORE4G_R_GT2_1: - case PCI_PRODUCT_INTEL_CORE4G_R_GT3_1: - case PCI_PRODUCT_INTEL_CORE4G_R_GT1_2: - case PCI_PRODUCT_INTEL_CORE4G_R_GT2_2: - case PCI_PRODUCT_INTEL_CORE4G_R_GT3_2: - case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT1: - case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT2: - case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT3: - case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT1: - case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT2: - case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT3: - case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT1: - case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT2: - case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT3: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT1_1: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT2_1: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT3_1: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT1_2: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT2_2: - case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT3_2: - case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT1: - case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT2: - case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT3: - case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT1: - case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT2: - case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3: - case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT1: - case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT2: - case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT3: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT1_1: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT2_1: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_1: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT1_2: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT2_2: - case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_2: - case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT1: - case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT2: - case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT3: - case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT1: - case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT2: - case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT3: - case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT1: - case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT2: - case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT3: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT1_1: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT2_1: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT3_1: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT1_2: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT2_2: - case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT3_2: - return (CHIP_HASWELL); - break; } return (CHIP_NONE); @@ -336,9 +250,6 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux) case CHIP_I965: case CHIP_G4X: case CHIP_IRONLAKE: - case CHIP_SANDYBRIDGE: - case CHIP_IVYBRIDGE: - case CHIP_HASWELL: gmaddr = AGP_I965_GMADR; mmaddr = AGP_I965_MMADR; memtype = PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT; @@ -576,19 +487,6 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux) gatt->ag_physical = READ4(AGP_I810_PGTBL_CTL) & ~1; break; - case CHIP_SANDYBRIDGE: - case CHIP_IVYBRIDGE: - case CHIP_HASWELL: - /* - * Even though stolen memory exists on these machines, - * it isn't necessarily mapped into the aperture. - */ - isc->stolen = 0; - - /* GATT address is already in there, make sure it's enabled */ - gatt->ag_physical = READ4(AGP_I810_PGTBL_CTL) & ~1; - break; - default: printf(": unknown initialisation\n"); return; @@ -669,34 +567,13 @@ void agp_i810_bind_page(void *sc, bus_addr_t offset, paddr_t physical, int flags) { struct agp_i810_softc *isc = sc; + /* * COHERENT mappings mean set the snoop bit. this should never be * accessed by the gpu through the gtt. */ - switch (isc->chiptype) { - case CHIP_SANDYBRIDGE: - case CHIP_IVYBRIDGE: - if (flags & BUS_DMA_GTT_NOCACHE) - physical |= GEN6_PTE_UNCACHED; - if (flags & BUS_DMA_GTT_CACHE_LLC) - physical |= GEN6_PTE_CACHE_LLC; - if (flags & BUS_DMA_GTT_CACHE_LLC_MLC) - physical |= GEN6_PTE_CACHE_LLC_MLC; - break; - case CHIP_HASWELL: - if (flags & BUS_DMA_GTT_NOCACHE) - physical |= HSW_PTE_UNCACHED; - if (flags & BUS_DMA_GTT_CACHE_LLC) - physical |= GEN6_PTE_CACHE_LLC; - /* Haswell doesn't set L3 this way */ - if (flags & BUS_DMA_GTT_CACHE_LLC_MLC) - physical |= GEN6_PTE_CACHE_LLC; - break; - default: - if (flags & BUS_DMA_COHERENT) - physical |= INTEL_COHERENT; - break; - } + if (flags & BUS_DMA_COHERENT) + physical |= INTEL_COHERENT; intagp_write_gtt(isc, offset - isc->isc_apaddr, physical); } @@ -923,12 +800,6 @@ intagp_write_gtt(struct agp_i810_softc *isc, bus_size_t off, paddr_t v) case CHIP_IRONLAKE: pte |= (v & 0x0000000f00000000ULL) >> 28; break; - /* gen6+ can do 40 bit addressing */ - case CHIP_SANDYBRIDGE: - case CHIP_IVYBRIDGE: - case CHIP_HASWELL: - pte |= (v & 0x000000ff00000000ULL) >> 28; - break; } } @@ -947,9 +818,6 @@ intagp_write_gtt(struct agp_i810_softc *isc, bus_size_t off, paddr_t v) break; case CHIP_G4X: case CHIP_IRONLAKE: - case CHIP_SANDYBRIDGE: - case CHIP_IVYBRIDGE: - case CHIP_HASWELL: baseoff = AGP_G4X_GTT; break; default: diff --git a/sys/dev/pci/drm/i915/i915_drv.c b/sys/dev/pci/drm/i915/i915_drv.c index ab8089e5dea..73447be1311 100644 --- a/sys/dev/pci/drm/i915/i915_drv.c +++ b/sys/dev/pci/drm/i915/i915_drv.c @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_drv.c,v 1.65 2014/03/16 03:34:32 jsg Exp $ */ +/* $OpenBSD: i915_drv.c,v 1.66 2014/05/12 19:29:16 kettenis Exp $ */ /* * Copyright (c) 2008-2009 Owain G. Ainsworth * @@ -872,6 +872,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) const struct drm_pcidev *id_entry; int i; uint16_t pci_device; + uint32_t aperture_size; id_entry = drm_find_description(PCI_VENDOR(pa->pa_id), PCI_PRODUCT(pa->pa_id), inteldrm_pciidlist); @@ -886,6 +887,9 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) printf("\n"); + if (dev_priv->info->gen >= 6) + inteldrm_driver.flags &= ~(DRIVER_AGP | DRIVER_AGP_REQUIRE); + /* All intel chipsets need to be treated as agp, so just pass one */ dev_priv->drmdev = drm_attach_pci(&inteldrm_driver, pa, 1, 1, self); @@ -926,6 +930,8 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) return; } + i915_gem_gtt_init(dev); + intel_irq_init(dev); /* @@ -991,6 +997,9 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) return; } + aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; + dev_priv->mm.gtt_base_addr = dev_priv->mm.gtt->gma_bus_addr; + intel_pm_init(dev); intel_gt_sanitize(dev); intel_gt_init(dev); @@ -1000,23 +1009,24 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux) intel_setup_gmbus(dev_priv); /* XXX would be a lot nicer to get agp info before now */ - uvm_page_physload(atop(dev->agp->base), atop(dev->agp->base + - dev->agp->info.ai_aperture_size), atop(dev->agp->base), - atop(dev->agp->base + dev->agp->info.ai_aperture_size), + uvm_page_physload(atop(dev_priv->mm.gtt_base_addr), + atop(dev_priv->mm.gtt_base_addr + aperture_size), + atop(dev_priv->mm.gtt_base_addr), + atop(dev_priv->mm.gtt_base_addr + aperture_size), PHYSLOAD_DEVICE); /* array of vm pages that physload introduced. */ - dev_priv->pgs = PHYS_TO_VM_PAGE(dev->agp->base); + dev_priv->pgs = PHYS_TO_VM_PAGE(dev_priv->mm.gtt_base_addr); KASSERT(dev_priv->pgs != NULL); /* * XXX mark all pages write combining so user mmaps get the right * bits. We really need a proper MI api for doing this, but for now * this allows us to use PAT where available. */ - for (i = 0; i < atop(dev->agp->info.ai_aperture_size); i++) + for (i = 0; i < atop(aperture_size); i++) atomic_setbits_int(&(dev_priv->pgs[i].pg_flags), PG_PMAP_WC); - if (agp_init_map(dev_priv->bst, dev->agp->base, - dev->agp->info.ai_aperture_size, BUS_SPACE_MAP_LINEAR | - BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->agph)) + if (agp_init_map(dev_priv->bst, dev_priv->mm.gtt_base_addr, + aperture_size, BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE, + &dev_priv->agph)) panic("can't map aperture"); /* XXX */ diff --git a/sys/dev/pci/drm/i915/i915_drv.h b/sys/dev/pci/drm/i915/i915_drv.h index acb34f051f8..60537ee53d9 100644 --- a/sys/dev/pci/drm/i915/i915_drv.h +++ b/sys/dev/pci/drm/i915/i915_drv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_drv.h,v 1.51 2014/03/25 17:44:39 mpi Exp $ */ +/* $OpenBSD: i915_drv.h,v 1.52 2014/05/12 19:29:16 kettenis Exp $ */ /* i915_drv.h -- Private header for the I915 driver -*- linux-c -*- */ /* @@ -44,6 +44,23 @@ #include "acpi.h" +struct intel_gtt { + /* Size of memory reserved for graphics by the BIOS */ + unsigned int stolen_size; + /* Total number of gtt entries. */ + unsigned int gtt_total_entries; + /* Part of the gtt that is mappable by the cpu, for those chips where + * this is not the full gtt. */ + unsigned int gtt_mappable_entries; + /* Share the scratch page dma with ppgtts. */ + bus_addr_t scratch_page_dma; + struct drm_dmamem *scratch_page; + /* for ppgtt PDE access */ + bus_space_handle_t gtt; + /* needed for ioremap in drm/i915 */ + bus_addr_t gma_bus_addr; +}; + /* General customization: */ @@ -673,6 +690,8 @@ struct inteldrm_softc { unsigned long gtt_mappable_end; unsigned long gtt_end; + bus_addr_t gtt_base_addr; + /** * List of objects currently involved in rendering from the * ringbuffer. @@ -1338,6 +1357,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, unsigned long end); +int i915_gem_gtt_init(struct drm_device *dev); +void i915_gem_gtt_fini(struct drm_device *dev); /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); diff --git a/sys/dev/pci/drm/i915/i915_gem.c b/sys/dev/pci/drm/i915/i915_gem.c index 8b490dc6b35..0e18aa242fb 100644 --- a/sys/dev/pci/drm/i915/i915_gem.c +++ b/sys/dev/pci/drm/i915/i915_gem.c @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_gem.c,v 1.72 2014/04/01 20:16:50 kettenis Exp $ */ +/* $OpenBSD: i915_gem.c,v 1.73 2014/05/12 19:29:16 kettenis Exp $ */ /* * Copyright (c) 2008-2009 Owain G. Ainsworth * @@ -1498,7 +1498,7 @@ i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi, if (pps[lcv] == PGO_DONTCARE) continue; - paddr = dev->agp->base + obj->gtt_offset + offset; + paddr = dev_priv->mm.gtt_base_addr + obj->gtt_offset + offset; if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr, mapprot, PMAP_CANFAIL | mapprot) != 0) { @@ -4157,11 +4157,12 @@ intel_enable_ppgtt(struct drm_device *dev) int i915_gem_init(struct drm_device *dev) { + struct drm_i915_private *dev_priv = dev->dev_private; unsigned long gtt_size, mappable_size; int ret; - gtt_size = dev->agp->info.ai_aperture_size; - mappable_size = dev->agp->info.ai_aperture_size; + gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT; + mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT; DRM_LOCK(); #ifdef notyet diff --git a/sys/dev/pci/drm/i915/i915_gem_gtt.c b/sys/dev/pci/drm/i915/i915_gem_gtt.c index 4b634e782b7..a1c1a79845b 100644 --- a/sys/dev/pci/drm/i915/i915_gem_gtt.c +++ b/sys/dev/pci/drm/i915/i915_gem_gtt.c @@ -1,4 +1,4 @@ -/* $OpenBSD: i915_gem_gtt.c,v 1.8 2013/12/11 20:31:43 kettenis Exp $ */ +/* $OpenBSD: i915_gem_gtt.c,v 1.9 2014/05/12 19:29:16 kettenis Exp $ */ /* * Copyright © 2010 Daniel Vetter * @@ -31,7 +31,6 @@ typedef uint32_t gtt_pte_t; -#ifdef notyet /* PPGTT stuff */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) @@ -47,7 +46,7 @@ typedef uint32_t gtt_pte_t; #define GEN6_PTE_ADDR_ENCODE(addr) GEN6_GTT_ADDR_ENCODE(addr) static inline gtt_pte_t pte_encode(struct drm_device *dev, - dma_addr_t addr, + bus_addr_t addr, enum i915_cache_level level) { gtt_pte_t pte = GEN6_PTE_VALID; @@ -78,6 +77,8 @@ static inline gtt_pte_t pte_encode(struct drm_device *dev, return pte; } +#ifdef notyet + /* PPGTT support for Sandybdrige/Gen6 and later */ static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt, unsigned first_entry, @@ -373,7 +374,8 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) } -#if 0 +#ifdef __linux__ + static void i915_ggtt_clear_range(struct drm_device *dev, unsigned first_entry, unsigned num_entries) @@ -399,20 +401,43 @@ static void i915_ggtt_clear_range(struct drm_device *dev, iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); } + #else + static void i915_ggtt_clear_range(struct drm_device *dev, unsigned first_entry, unsigned num_entries) { - struct agp_softc *sc = dev->agp->agpdev; - bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT); + struct drm_i915_private *dev_priv = dev->dev_private; + gtt_pte_t scratch_pte; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; int i; - for (i = 0; i < num_entries; i++) { - sc->sc_methods->unbind_page(sc->sc_chipc, addr); - addr += PAGE_SIZE; + if (INTEL_INFO(dev)->gen < 6) { + struct agp_softc *sc = dev->agp->agpdev; + bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT); + int i; + + for (i = 0; i < num_entries; i++) { + sc->sc_methods->unbind_page(sc->sc_chipc, addr); + addr += PAGE_SIZE; + } + return; } + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC); + for (i = 0; i < num_entries; i++) + bus_space_write_4(dev_priv->bst, dev_priv->mm.gtt->gtt, + (i + first_entry) * sizeof(gtt_pte_t), scratch_pte); + bus_space_read_4(dev_priv->bst, dev_priv->mm.gtt->gtt, + first_entry * sizeof(gtt_pte_t)); } + #endif void i915_gem_restore_gtt_mappings(struct drm_device *dev) @@ -447,7 +472,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) return 0; } -#ifdef notyet +#ifdef __linux__ + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -496,9 +522,60 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); } + +#else + +/* + * Binds an object into the global gtt with the specified cache level. The object + * will be accessible to the GPU via commands whose operands reference offsets + * within the global GTT as well as accessible by the GPU through the GMADR + * mapped BAR (dev_priv->mm.gtt->gtt). + */ +static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj, + enum i915_cache_level level) +{ + struct drm_device *dev = obj->base.dev; + struct drm_i915_private *dev_priv = dev->dev_private; + const int first_entry = obj->gtt_space->start >> PAGE_SHIFT; + const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry; + int page_count = obj->base.size >> PAGE_SHIFT; + bus_addr_t addr; + int i; + + for (i = 0; i < page_count; i++) { + struct vm_page *page = obj->pages[i]; + addr = VM_PAGE_TO_PHYS(page); + bus_space_write_4(dev_priv->bst, dev_priv->mm.gtt->gtt, + (i + first_entry) * sizeof(gtt_pte_t), + pte_encode(dev, addr, level)); + } + + BUG_ON(i > max_entries); + BUG_ON(i != obj->base.size / PAGE_SIZE); + + /* XXX: This serves as a posting read to make sure that the PTE has + * actually been updated. There is some concern that even though + * registers and PTEs are within the same BAR that they are potentially + * of NUMA access patterns. Therefore, even with the way we assume + * hardware should work, we must keep this posting read for paranoia. + */ + if (i != 0) + WARN_ON(bus_space_read_4(dev_priv->bst, dev_priv->mm.gtt->gtt, + (i + first_entry - 1) * sizeof(gtt_pte_t)) != + pte_encode(dev, addr, level)); + + /* This next bit makes the above posting read even more important. We + * want to flush the TLBs only after we're certain all the PTE updates + * have finished. + */ + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); +} + #endif -#if 0 +#ifdef __linux__ + void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { @@ -515,40 +592,33 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, obj->has_global_gtt_mapping = 1; } + #else + void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { struct drm_device *dev = obj->base.dev; - unsigned int flags = (cache_level == I915_CACHE_NONE) ? - 0 : BUS_DMA_COHERENT; - struct agp_softc *sc = dev->agp->agpdev; - bus_addr_t addr = sc->sc_apaddr + obj->gtt_space->start; - int page_count = obj->base.size >> PAGE_SHIFT; - int i; - - switch (cache_level) { - case I915_CACHE_NONE: - flags |= BUS_DMA_GTT_NOCACHE; - break; - case I915_CACHE_LLC: - flags |= BUS_DMA_GTT_CACHE_LLC; - break; - case I915_CACHE_LLC_MLC: - flags |= BUS_DMA_GTT_CACHE_LLC_MLC; - break; - default: - BUG(); - } - - for (i = 0; i < page_count; i++) { - sc->sc_methods->bind_page(sc->sc_chipc, addr, - VM_PAGE_TO_PHYS(obj->pages[i]), flags); - addr += PAGE_SIZE; + if (INTEL_INFO(dev)->gen < 6) { + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + 0 : BUS_DMA_COHERENT; + struct agp_softc *sc = dev->agp->agpdev; + bus_addr_t addr = sc->sc_apaddr + obj->gtt_space->start; + int page_count = obj->base.size >> PAGE_SHIFT; + int i; + + for (i = 0; i < page_count; i++) { + sc->sc_methods->bind_page(sc->sc_chipc, addr, + VM_PAGE_TO_PHYS(obj->pages[i]), flags); + addr += PAGE_SIZE; + } + } else { + gen6_ggtt_bind_object(obj, cache_level); } obj->has_global_gtt_mapping = 1; } + #endif void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj) @@ -617,7 +687,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev, i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE); } -#ifdef notyet +#ifdef __linux__ + static int setup_scratch_page(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -654,6 +725,33 @@ static void teardown_scratch_page(struct drm_device *dev) __free_page(dev_priv->mm.gtt->scratch_page); } +#else + +static int setup_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_dmamem *page; + + page = drm_dmamem_alloc(dev_priv->dmat, PAGE_SIZE, 0, 1, PAGE_SIZE, + BUS_DMA_NOCACHE, 0); + if (page == NULL) + return -ENOMEM; + + dev_priv->mm.gtt->scratch_page = page; + dev_priv->mm.gtt->scratch_page_dma = page->segs[0].ds_addr; + + return 0; +} + +static void teardown_scratch_page(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + drm_dmamem_free(dev_priv->dmat, dev_priv->mm.gtt->scratch_page); +} + +#endif + static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; @@ -677,6 +775,8 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl) return stolen_decoder[snb_gmch_ctl] << 20; } +#ifdef __linux__ + int i915_gem_gtt_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -777,4 +877,143 @@ void i915_gem_gtt_fini(struct drm_device *dev) intel_gmch_remove(); kfree(dev_priv->mm.gtt); } -#endif /* notyet */ + +#else + +static void intel_gmch_remove(void) {}; + +int i915_gem_gtt_init(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + bus_addr_t gtt_bus_addr; + bus_size_t size; + u16 snb_gmch_ctl; + int ret; + + /* On modern platforms we need not worry ourself with the legacy + * hostbridge query stuff. Skip it entirely + */ + if (INTEL_INFO(dev)->gen < 6) { +#if 0 + ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL); + if (!ret) { + DRM_ERROR("failed to set up gmch\n"); + return -EIO; + } +#endif + + dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); + if (!dev_priv->mm.gtt) { + DRM_ERROR("Failed to initialize GTT\n"); + intel_gmch_remove(); + return -ENODEV; + } + dev_priv->mm.gtt->gtt_mappable_entries = + dev->agp->info.ai_aperture_size >> PAGE_SHIFT; + dev_priv->mm.gtt->gtt_total_entries = + dev_priv->mm.gtt->gtt_mappable_entries; + dev_priv->mm.gtt->gma_bus_addr = dev->agp->base; + return 0; + } + + dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL); + if (!dev_priv->mm.gtt) + return -ENOMEM; + +#if 0 + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); +#endif + +#ifdef CONFIG_INTEL_IOMMU + dev_priv->mm.gtt->needs_dmar = 1; +#endif + + /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */ + ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x10, + PCI_MAPREG_MEM_TYPE_64BIT, >t_bus_addr, NULL, NULL); + if (ret) + goto err_out; + gtt_bus_addr += (2<<20); + ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, + PCI_MAPREG_MEM_TYPE_64BIT, &dev_priv->mm.gtt->gma_bus_addr, + NULL, NULL); + if (ret) + goto err_out; + + /* i9xx_setup */ + snb_gmch_ctl = pci_conf_read(dev_priv->pc, dev_priv->tag, SNB_GMCH_CTRL); + dev_priv->mm.gtt->gtt_total_entries = + gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t); + if (INTEL_INFO(dev)->gen < 7) + dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl); + else + dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl); + + ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18, + PCI_MAPREG_MEM_TYPE_64BIT, NULL, &size, NULL); + if (ret) + goto err_out; + dev_priv->mm.gtt->gtt_mappable_entries = size >> PAGE_SHIFT; + /* 64/512MB is the current min/max we actually know of, but this is just a + * coarse sanity check. + */ + if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 || + dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) { + DRM_ERROR("Unknown GMADR entries (%d)\n", + dev_priv->mm.gtt->gtt_mappable_entries); + ret = -ENXIO; + goto err_out; + } + + ret = setup_scratch_page(dev); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + goto err_out; + } + +#if 0 + if (bus_space_map(dev_priv->bst, gtt_bus_addr, + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t), + BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->mm.gtt->gtt)) { + DRM_ERROR("Failed to map the gtt page table\n"); + teardown_scratch_page(dev); + ret = -ENOMEM; + goto err_out; + } +#else + if (bus_space_subregion(dev_priv->bst, dev_priv->regs->bsh, (2<<20), + dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t), + &dev_priv->mm.gtt->gtt)) { + DRM_ERROR("Failed to map the gtt page table %d\n", ret); + teardown_scratch_page(dev); + ret = -ENOMEM; + goto err_out; + } +#endif + + /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */ + DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8); + DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8); + DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20); + + return 0; + +err_out: + kfree(dev_priv->mm.gtt); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + return ret; +} + +void i915_gem_gtt_fini(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; +// iounmap(dev_priv->mm.gtt->gtt); + teardown_scratch_page(dev); + if (INTEL_INFO(dev)->gen < 6) + intel_gmch_remove(); + kfree(dev_priv->mm.gtt); +} + +#endif