Move GTT management for Sandy Bridge and up into inteldrm(4). This makes
authorkettenis <kettenis@openbsd.org>
Mon, 12 May 2014 19:29:16 +0000 (19:29 +0000)
committerkettenis <kettenis@openbsd.org>
Mon, 12 May 2014 19:29:16 +0000 (19:29 +0000)
it possible to use the non-mappable part of the GTT, prepares the way for
using the PPGTT and reduces the diffs with Linux.

ok jsg@

sys/dev/pci/agp_i810.c
sys/dev/pci/drm/i915/i915_drv.c
sys/dev/pci/drm/i915/i915_drv.h
sys/dev/pci/drm/i915/i915_gem.c
sys/dev/pci/drm/i915/i915_gem_gtt.c

index 94a60eb..8ba5968 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: agp_i810.c,v 1.85 2014/03/26 14:41:41 mpi Exp $       */
+/*     $OpenBSD: agp_i810.c,v 1.86 2014/05/12 19:29:16 kettenis Exp $  */
 
 /*-
  * Copyright (c) 2000 Doug Rabson
 /* Memory is snooped, must not be accessed through gtt from the cpu. */
 #define        INTEL_COHERENT  0x6     
 
-#define GEN6_PTE_UNCACHED              (1 << 1)
-#define HSW_PTE_UNCACHED               (0)
-#define GEN6_PTE_CACHE_LLC             (2 << 1)
-#define GEN6_PTE_CACHE_LLC_MLC         (3 << 1)
-
 enum {
        CHIP_NONE       = 0,    /* not integrated graphics */
        CHIP_I810       = 1,    /* i810/i815 */
@@ -73,9 +68,6 @@ enum {
        CHIP_G4X        = 7,    /* G4X */
        CHIP_PINEVIEW   = 8,    /* Pineview/Pineview M */
        CHIP_IRONLAKE   = 9,    /* Clarkdale/Arrandale */
-       CHIP_SANDYBRIDGE=10,    /* Sandybridge */
-       CHIP_IVYBRIDGE  =11,    /* Ivybridge */
-       CHIP_HASWELL    =12,    /* Haswell */
 };
 
 struct agp_i810_softc {
@@ -201,84 +193,6 @@ agp_i810_get_chiptype(struct pci_attach_args *pa)
        case PCI_PRODUCT_INTEL_ARRANDALE_IGD:
                return (CHIP_IRONLAKE);
                break;
-       case PCI_PRODUCT_INTEL_CORE2G_GT1:
-       case PCI_PRODUCT_INTEL_CORE2G_M_GT1:
-       case PCI_PRODUCT_INTEL_CORE2G_S_GT:
-       case PCI_PRODUCT_INTEL_CORE2G_GT2:
-       case PCI_PRODUCT_INTEL_CORE2G_M_GT2:
-       case PCI_PRODUCT_INTEL_CORE2G_GT2_PLUS:
-       case PCI_PRODUCT_INTEL_CORE2G_M_GT2_PLUS:
-               return (CHIP_SANDYBRIDGE);
-               break;
-       case PCI_PRODUCT_INTEL_CORE3G_D_GT1:
-       case PCI_PRODUCT_INTEL_CORE3G_M_GT1:
-       case PCI_PRODUCT_INTEL_CORE3G_S_GT1:
-       case PCI_PRODUCT_INTEL_CORE3G_D_GT2:
-       case PCI_PRODUCT_INTEL_CORE3G_M_GT2:
-       case PCI_PRODUCT_INTEL_CORE3G_S_GT2:
-               return (CHIP_IVYBRIDGE);
-       case PCI_PRODUCT_INTEL_CORE4G_D_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_D_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_S_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_S_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_S_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_M_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_M_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_M_GT2_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT1_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT2_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT3_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT1_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT2_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_GT3_2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_SDV_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_S_SDV_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_M_SDV_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT1_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT2_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT3_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT1_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT2_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_SDV_GT3_2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_ULT_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_S_ULT_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_M_ULT_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT1_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT2_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT1_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT2_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_ULT_GT3_2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_D_CRW_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_S_CRW_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT1:
-       case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT2:
-       case PCI_PRODUCT_INTEL_CORE4G_M_CRW_GT3:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT1_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT2_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT3_1:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT1_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT2_2:
-       case PCI_PRODUCT_INTEL_CORE4G_R_CRW_GT3_2:
-               return (CHIP_HASWELL);
-               break;
        }
        
        return (CHIP_NONE);
@@ -336,9 +250,6 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux)
        case CHIP_I965:
        case CHIP_G4X:
        case CHIP_IRONLAKE:
-       case CHIP_SANDYBRIDGE:
-       case CHIP_IVYBRIDGE:
-       case CHIP_HASWELL:
                gmaddr = AGP_I965_GMADR;
                mmaddr = AGP_I965_MMADR;
                memtype = PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT;
@@ -576,19 +487,6 @@ agp_i810_attach(struct device *parent, struct device *self, void *aux)
                gatt->ag_physical = READ4(AGP_I810_PGTBL_CTL) & ~1;
                break;
 
-       case CHIP_SANDYBRIDGE:
-       case CHIP_IVYBRIDGE:
-       case CHIP_HASWELL:
-               /*
-                * Even though stolen memory exists on these machines,
-                * it isn't necessarily mapped into the aperture.
-                */
-               isc->stolen = 0;
-
-               /* GATT address is already in there, make sure it's enabled */
-               gatt->ag_physical = READ4(AGP_I810_PGTBL_CTL) & ~1;
-               break;
-
        default:
                printf(": unknown initialisation\n");
                return;
@@ -669,34 +567,13 @@ void
 agp_i810_bind_page(void *sc, bus_addr_t offset, paddr_t physical, int flags)
 {
        struct agp_i810_softc *isc = sc;
+
        /*
         * COHERENT mappings mean set the snoop bit. this should never be
         * accessed by the gpu through the gtt.
         */
-       switch (isc->chiptype) {
-       case CHIP_SANDYBRIDGE:
-       case CHIP_IVYBRIDGE:
-               if (flags & BUS_DMA_GTT_NOCACHE)
-                       physical |= GEN6_PTE_UNCACHED;
-               if (flags & BUS_DMA_GTT_CACHE_LLC)
-                       physical |= GEN6_PTE_CACHE_LLC;
-               if (flags & BUS_DMA_GTT_CACHE_LLC_MLC)
-                       physical |= GEN6_PTE_CACHE_LLC_MLC;
-               break;
-       case CHIP_HASWELL:
-               if (flags & BUS_DMA_GTT_NOCACHE)
-                       physical |= HSW_PTE_UNCACHED;
-               if (flags & BUS_DMA_GTT_CACHE_LLC)
-                       physical |= GEN6_PTE_CACHE_LLC;
-               /* Haswell doesn't set L3 this way */
-               if (flags & BUS_DMA_GTT_CACHE_LLC_MLC)
-                       physical |= GEN6_PTE_CACHE_LLC;
-               break;
-       default:
-               if (flags & BUS_DMA_COHERENT)
-                       physical |= INTEL_COHERENT;
-               break;
-       }
+       if (flags & BUS_DMA_COHERENT)
+               physical |= INTEL_COHERENT;
 
        intagp_write_gtt(isc, offset - isc->isc_apaddr, physical);
 }
@@ -923,12 +800,6 @@ intagp_write_gtt(struct agp_i810_softc *isc, bus_size_t off, paddr_t v)
                case CHIP_IRONLAKE:
                        pte |= (v & 0x0000000f00000000ULL) >> 28;
                        break;
-               /* gen6+ can do 40 bit addressing */
-               case CHIP_SANDYBRIDGE:
-               case CHIP_IVYBRIDGE:
-               case CHIP_HASWELL:
-                       pte |= (v & 0x000000ff00000000ULL) >> 28;
-                       break;
                }
        }
 
@@ -947,9 +818,6 @@ intagp_write_gtt(struct agp_i810_softc *isc, bus_size_t off, paddr_t v)
                break;
        case CHIP_G4X:
        case CHIP_IRONLAKE:
-       case CHIP_SANDYBRIDGE:
-       case CHIP_IVYBRIDGE:
-       case CHIP_HASWELL:
                baseoff = AGP_G4X_GTT;
                break;
        default:
index ab8089e..73447be 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: i915_drv.c,v 1.65 2014/03/16 03:34:32 jsg Exp $ */
+/* $OpenBSD: i915_drv.c,v 1.66 2014/05/12 19:29:16 kettenis Exp $ */
 /*
  * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org>
  *
@@ -872,6 +872,7 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
        const struct drm_pcidev *id_entry;
        int                      i;
        uint16_t                 pci_device;
+       uint32_t                 aperture_size;
 
        id_entry = drm_find_description(PCI_VENDOR(pa->pa_id),
            PCI_PRODUCT(pa->pa_id), inteldrm_pciidlist);
@@ -886,6 +887,9 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
 
        printf("\n");
 
+       if (dev_priv->info->gen >= 6)
+               inteldrm_driver.flags &= ~(DRIVER_AGP | DRIVER_AGP_REQUIRE);
+
        /* All intel chipsets need to be treated as agp, so just pass one */
        dev_priv->drmdev = drm_attach_pci(&inteldrm_driver, pa, 1, 1, self);
 
@@ -926,6 +930,8 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
                return;
        }
 
+       i915_gem_gtt_init(dev);
+
        intel_irq_init(dev);
 
        /*
@@ -991,6 +997,9 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
                return;
        }
 
+       aperture_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
+       dev_priv->mm.gtt_base_addr = dev_priv->mm.gtt->gma_bus_addr;
+
        intel_pm_init(dev);
        intel_gt_sanitize(dev);
        intel_gt_init(dev);
@@ -1000,23 +1009,24 @@ inteldrm_attach(struct device *parent, struct device *self, void *aux)
        intel_setup_gmbus(dev_priv);
 
        /* XXX would be a lot nicer to get agp info before now */
-       uvm_page_physload(atop(dev->agp->base), atop(dev->agp->base +
-           dev->agp->info.ai_aperture_size), atop(dev->agp->base),
-           atop(dev->agp->base + dev->agp->info.ai_aperture_size),
+       uvm_page_physload(atop(dev_priv->mm.gtt_base_addr),
+           atop(dev_priv->mm.gtt_base_addr + aperture_size),
+           atop(dev_priv->mm.gtt_base_addr),
+           atop(dev_priv->mm.gtt_base_addr + aperture_size),
            PHYSLOAD_DEVICE);
        /* array of vm pages that physload introduced. */
-       dev_priv->pgs = PHYS_TO_VM_PAGE(dev->agp->base);
+       dev_priv->pgs = PHYS_TO_VM_PAGE(dev_priv->mm.gtt_base_addr);
        KASSERT(dev_priv->pgs != NULL);
        /*
         * XXX mark all pages write combining so user mmaps get the right
         * bits. We really need a proper MI api for doing this, but for now
         * this allows us to use PAT where available.
         */
-       for (i = 0; i < atop(dev->agp->info.ai_aperture_size); i++)
+       for (i = 0; i < atop(aperture_size); i++)
                atomic_setbits_int(&(dev_priv->pgs[i].pg_flags), PG_PMAP_WC);
-       if (agp_init_map(dev_priv->bst, dev->agp->base,
-           dev->agp->info.ai_aperture_size, BUS_SPACE_MAP_LINEAR |
-           BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->agph))
+       if (agp_init_map(dev_priv->bst, dev_priv->mm.gtt_base_addr,
+           aperture_size, BUS_SPACE_MAP_LINEAR | BUS_SPACE_MAP_PREFETCHABLE,
+           &dev_priv->agph))
                panic("can't map aperture");
 
        /* XXX */
index acb34f0..60537ee 100644 (file)
@@ -1,4 +1,4 @@
-/* $OpenBSD: i915_drv.h,v 1.51 2014/03/25 17:44:39 mpi Exp $ */
+/* $OpenBSD: i915_drv.h,v 1.52 2014/05/12 19:29:16 kettenis Exp $ */
 /* i915_drv.h -- Private header for the I915 driver -*- linux-c -*-
  */
 /*
 
 #include "acpi.h"
 
+struct intel_gtt {
+       /* Size of memory reserved for graphics by the BIOS */
+       unsigned int stolen_size;
+       /* Total number of gtt entries. */
+       unsigned int gtt_total_entries;
+       /* Part of the gtt that is mappable by the cpu, for those chips where
+        * this is not the full gtt. */
+       unsigned int gtt_mappable_entries;
+       /* Share the scratch page dma with ppgtts. */
+       bus_addr_t scratch_page_dma;
+       struct drm_dmamem *scratch_page;
+       /* for ppgtt PDE access */
+       bus_space_handle_t gtt;
+       /* needed for ioremap in drm/i915 */
+       bus_addr_t gma_bus_addr;
+};
+
 /* General customization:
  */
 
@@ -673,6 +690,8 @@ struct inteldrm_softc {
                unsigned long gtt_mappable_end;
                unsigned long gtt_end;
 
+               bus_addr_t gtt_base_addr;
+
                /**
                 * List of objects currently involved in rendering from the
                 * ringbuffer.
@@ -1338,6 +1357,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
                              unsigned long start,
                              unsigned long mappable_end,
                              unsigned long end);
+int i915_gem_gtt_init(struct drm_device *dev);
+void i915_gem_gtt_fini(struct drm_device *dev);
 
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
index 8b490dc..0e18aa2 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: i915_gem.c,v 1.72 2014/04/01 20:16:50 kettenis Exp $  */
+/*     $OpenBSD: i915_gem.c,v 1.73 2014/05/12 19:29:16 kettenis Exp $  */
 /*
  * Copyright (c) 2008-2009 Owain G. Ainsworth <oga@openbsd.org>
  *
@@ -1498,7 +1498,7 @@ i915_gem_fault(struct drm_gem_object *gem_obj, struct uvm_faultinfo *ufi,
                if (pps[lcv] == PGO_DONTCARE)
                        continue;
 
-               paddr = dev->agp->base + obj->gtt_offset + offset;
+               paddr = dev_priv->mm.gtt_base_addr + obj->gtt_offset + offset;
 
                if (pmap_enter(ufi->orig_map->pmap, vaddr, paddr,
                    mapprot, PMAP_CANFAIL | mapprot) != 0) {
@@ -4157,11 +4157,12 @@ intel_enable_ppgtt(struct drm_device *dev)
 
 int i915_gem_init(struct drm_device *dev)
 {
+       struct drm_i915_private *dev_priv = dev->dev_private;
        unsigned long gtt_size, mappable_size;
        int ret;
 
-       gtt_size = dev->agp->info.ai_aperture_size;
-       mappable_size = dev->agp->info.ai_aperture_size;
+       gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
+       mappable_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
 
        DRM_LOCK();
 #ifdef notyet
index 4b634e7..a1c1a79 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: i915_gem_gtt.c,v 1.8 2013/12/11 20:31:43 kettenis Exp $       */
+/*     $OpenBSD: i915_gem_gtt.c,v 1.9 2014/05/12 19:29:16 kettenis Exp $       */
 /*
  * Copyright © 2010 Daniel Vetter
  *
@@ -31,7 +31,6 @@
 
 typedef uint32_t gtt_pte_t;
 
-#ifdef notyet
 /* PPGTT stuff */
 #define GEN6_GTT_ADDR_ENCODE(addr)     ((addr) | (((addr) >> 28) & 0xff0))
 
@@ -47,7 +46,7 @@ typedef uint32_t gtt_pte_t;
 #define GEN6_PTE_ADDR_ENCODE(addr)     GEN6_GTT_ADDR_ENCODE(addr)
 
 static inline gtt_pte_t pte_encode(struct drm_device *dev,
-                                  dma_addr_t addr,
+                                  bus_addr_t addr,
                                   enum i915_cache_level level)
 {
        gtt_pte_t pte = GEN6_PTE_VALID;
@@ -78,6 +77,8 @@ static inline gtt_pte_t pte_encode(struct drm_device *dev,
        return pte;
 }
 
+#ifdef notyet
+
 /* PPGTT support for Sandybdrige/Gen6 and later */
 static void i915_ppgtt_clear_range(struct i915_hw_ppgtt *ppgtt,
                                   unsigned first_entry,
@@ -373,7 +374,8 @@ static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
 }
 
 
-#if 0
+#ifdef __linux__
+
 static void i915_ggtt_clear_range(struct drm_device *dev,
                                 unsigned first_entry,
                                 unsigned num_entries)
@@ -399,20 +401,43 @@ static void i915_ggtt_clear_range(struct drm_device *dev,
                iowrite32(scratch_pte, &gtt_base[i]);
        readl(gtt_base);
 }
+
 #else
+
 static void i915_ggtt_clear_range(struct drm_device *dev,
                                 unsigned first_entry,
                                 unsigned num_entries)
 {
-       struct agp_softc *sc = dev->agp->agpdev;
-       bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT);
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       gtt_pte_t scratch_pte;
+       const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
        int i;
 
-       for (i = 0; i < num_entries; i++) {
-               sc->sc_methods->unbind_page(sc->sc_chipc, addr);
-               addr += PAGE_SIZE;
+       if (INTEL_INFO(dev)->gen < 6) {
+               struct agp_softc *sc = dev->agp->agpdev;
+               bus_addr_t addr = sc->sc_apaddr + (first_entry << PAGE_SHIFT);
+               int i;
+
+               for (i = 0; i < num_entries; i++) {
+                       sc->sc_methods->unbind_page(sc->sc_chipc, addr);
+                       addr += PAGE_SIZE;
+               }
+               return;
        }
+
+       if (WARN(num_entries > max_entries,
+                "First entry = %d; Num entries = %d (max=%d)\n",
+                first_entry, num_entries, max_entries))
+               num_entries = max_entries;
+
+       scratch_pte = pte_encode(dev, dev_priv->mm.gtt->scratch_page_dma, I915_CACHE_LLC);
+       for (i = 0; i < num_entries; i++)
+               bus_space_write_4(dev_priv->bst, dev_priv->mm.gtt->gtt,
+                   (i + first_entry) * sizeof(gtt_pte_t), scratch_pte);
+       bus_space_read_4(dev_priv->bst, dev_priv->mm.gtt->gtt,
+           first_entry * sizeof(gtt_pte_t));
 }
+
 #endif
 
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
@@ -447,7 +472,8 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
        return 0;
 }
 
-#ifdef notyet
+#ifdef __linux__
+
 /*
  * Binds an object into the global gtt with the specified cache level. The object
  * will be accessible to the GPU via commands whose operands reference offsets
@@ -496,9 +522,60 @@ static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
        I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
        POSTING_READ(GFX_FLSH_CNTL_GEN6);
 }
+
+#else
+
+/*
+ * Binds an object into the global gtt with the specified cache level. The object
+ * will be accessible to the GPU via commands whose operands reference offsets
+ * within the global GTT as well as accessible by the GPU through the GMADR
+ * mapped BAR (dev_priv->mm.gtt->gtt).
+ */
+static void gen6_ggtt_bind_object(struct drm_i915_gem_object *obj,
+                                 enum i915_cache_level level)
+{
+       struct drm_device *dev = obj->base.dev;
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       const int first_entry = obj->gtt_space->start >> PAGE_SHIFT;
+       const int max_entries = dev_priv->mm.gtt->gtt_total_entries - first_entry;
+       int page_count = obj->base.size >> PAGE_SHIFT;
+       bus_addr_t addr;
+       int i;
+
+       for (i = 0; i < page_count; i++) {
+               struct vm_page *page = obj->pages[i];
+               addr = VM_PAGE_TO_PHYS(page);
+               bus_space_write_4(dev_priv->bst, dev_priv->mm.gtt->gtt,
+                   (i + first_entry) * sizeof(gtt_pte_t),
+                   pte_encode(dev, addr, level));
+       }
+
+       BUG_ON(i > max_entries);
+       BUG_ON(i != obj->base.size / PAGE_SIZE);
+
+       /* XXX: This serves as a posting read to make sure that the PTE has
+        * actually been updated. There is some concern that even though
+        * registers and PTEs are within the same BAR that they are potentially
+        * of NUMA access patterns. Therefore, even with the way we assume
+        * hardware should work, we must keep this posting read for paranoia.
+        */
+       if (i != 0)
+               WARN_ON(bus_space_read_4(dev_priv->bst, dev_priv->mm.gtt->gtt,
+                    (i + first_entry - 1) * sizeof(gtt_pte_t)) !=
+                    pte_encode(dev, addr, level));
+
+       /* This next bit makes the above posting read even more important. We
+        * want to flush the TLBs only after we're certain all the PTE updates
+        * have finished.
+        */
+       I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+       POSTING_READ(GFX_FLSH_CNTL_GEN6);
+}
+
 #endif
 
-#if 0
+#ifdef __linux__
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
                              enum i915_cache_level cache_level)
 {
@@ -515,40 +592,33 @@ void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
 
        obj->has_global_gtt_mapping = 1;
 }
+
 #else
+
 void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
                              enum i915_cache_level cache_level)
 {
        struct drm_device *dev = obj->base.dev;
-       unsigned int flags = (cache_level == I915_CACHE_NONE) ?
-               0 : BUS_DMA_COHERENT;
-       struct agp_softc *sc = dev->agp->agpdev;
-       bus_addr_t addr = sc->sc_apaddr + obj->gtt_space->start;
-       int page_count = obj->base.size >> PAGE_SHIFT;
-       int i;
-
-       switch (cache_level) {
-       case I915_CACHE_NONE:
-               flags |= BUS_DMA_GTT_NOCACHE;
-               break;
-       case I915_CACHE_LLC:
-               flags |= BUS_DMA_GTT_CACHE_LLC;
-               break;
-       case I915_CACHE_LLC_MLC:
-               flags |= BUS_DMA_GTT_CACHE_LLC_MLC;
-               break;
-       default:
-               BUG();
-       }
-
-       for (i = 0; i < page_count; i++) {
-               sc->sc_methods->bind_page(sc->sc_chipc, addr,
-                   VM_PAGE_TO_PHYS(obj->pages[i]), flags);
-               addr += PAGE_SIZE;
+       if (INTEL_INFO(dev)->gen < 6) {
+               unsigned int flags = (cache_level == I915_CACHE_NONE) ?
+                       0 : BUS_DMA_COHERENT;
+               struct agp_softc *sc = dev->agp->agpdev;
+               bus_addr_t addr = sc->sc_apaddr + obj->gtt_space->start;
+               int page_count = obj->base.size >> PAGE_SHIFT;
+               int i;
+
+               for (i = 0; i < page_count; i++) {
+                       sc->sc_methods->bind_page(sc->sc_chipc, addr,
+                           VM_PAGE_TO_PHYS(obj->pages[i]), flags);
+                       addr += PAGE_SIZE;
+               }
+       } else {
+               gen6_ggtt_bind_object(obj, cache_level);
        }
 
        obj->has_global_gtt_mapping = 1;
 }
+
 #endif
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
@@ -617,7 +687,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev,
        i915_ggtt_clear_range(dev, start / PAGE_SIZE, (end-start) / PAGE_SIZE);
 }
 
-#ifdef notyet
+#ifdef __linux__
+
 static int setup_scratch_page(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -654,6 +725,33 @@ static void teardown_scratch_page(struct drm_device *dev)
        __free_page(dev_priv->mm.gtt->scratch_page);
 }
 
+#else
+
+static int setup_scratch_page(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       struct drm_dmamem *page;
+
+       page = drm_dmamem_alloc(dev_priv->dmat, PAGE_SIZE, 0, 1, PAGE_SIZE,
+           BUS_DMA_NOCACHE, 0);
+       if (page == NULL)
+               return -ENOMEM;
+
+       dev_priv->mm.gtt->scratch_page = page;
+       dev_priv->mm.gtt->scratch_page_dma = page->segs[0].ds_addr;
+
+       return 0;
+}
+
+static void teardown_scratch_page(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+
+       drm_dmamem_free(dev_priv->dmat, dev_priv->mm.gtt->scratch_page);
+}
+
+#endif
+
 static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
 {
        snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
@@ -677,6 +775,8 @@ static inline unsigned int gen7_get_stolen_size(u16 snb_gmch_ctl)
        return stolen_decoder[snb_gmch_ctl] << 20;
 }
 
+#ifdef __linux__
+
 int i915_gem_gtt_init(struct drm_device *dev)
 {
        struct drm_i915_private *dev_priv = dev->dev_private;
@@ -777,4 +877,143 @@ void i915_gem_gtt_fini(struct drm_device *dev)
                intel_gmch_remove();
        kfree(dev_priv->mm.gtt);
 }
-#endif /* notyet */
+
+#else
+
+static void intel_gmch_remove(void) {};
+
+int i915_gem_gtt_init(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+       bus_addr_t gtt_bus_addr;
+       bus_size_t size;
+       u16 snb_gmch_ctl;
+       int ret;
+
+       /* On modern platforms we need not worry ourself with the legacy
+        * hostbridge query stuff. Skip it entirely
+        */
+       if (INTEL_INFO(dev)->gen < 6) {
+#if 0
+               ret = intel_gmch_probe(dev_priv->bridge_dev, dev->pdev, NULL);
+               if (!ret) {
+                       DRM_ERROR("failed to set up gmch\n");
+                       return -EIO;
+               }
+#endif
+
+               dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+               if (!dev_priv->mm.gtt) {
+                       DRM_ERROR("Failed to initialize GTT\n");
+                       intel_gmch_remove();
+                       return -ENODEV;
+               }
+               dev_priv->mm.gtt->gtt_mappable_entries =
+                   dev->agp->info.ai_aperture_size >> PAGE_SHIFT;
+               dev_priv->mm.gtt->gtt_total_entries =
+                   dev_priv->mm.gtt->gtt_mappable_entries;
+               dev_priv->mm.gtt->gma_bus_addr = dev->agp->base;
+               return 0;
+       }
+
+       dev_priv->mm.gtt = kzalloc(sizeof(*dev_priv->mm.gtt), GFP_KERNEL);
+       if (!dev_priv->mm.gtt)
+               return -ENOMEM;
+
+#if 0
+       if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
+               pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
+#endif
+
+#ifdef CONFIG_INTEL_IOMMU
+       dev_priv->mm.gtt->needs_dmar = 1;
+#endif
+
+       /* For GEN6+ the PTEs for the ggtt live at 2MB + BAR0 */
+       ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x10,
+           PCI_MAPREG_MEM_TYPE_64BIT, &gtt_bus_addr, NULL, NULL);
+       if (ret)
+               goto err_out;
+       gtt_bus_addr += (2<<20);
+       ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18,
+           PCI_MAPREG_MEM_TYPE_64BIT, &dev_priv->mm.gtt->gma_bus_addr,
+           NULL, NULL);
+       if (ret)
+               goto err_out;
+
+       /* i9xx_setup */
+       snb_gmch_ctl = pci_conf_read(dev_priv->pc, dev_priv->tag, SNB_GMCH_CTRL);
+       dev_priv->mm.gtt->gtt_total_entries =
+               gen6_get_total_gtt_size(snb_gmch_ctl) / sizeof(gtt_pte_t);
+       if (INTEL_INFO(dev)->gen < 7)
+               dev_priv->mm.gtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
+       else
+               dev_priv->mm.gtt->stolen_size = gen7_get_stolen_size(snb_gmch_ctl);
+
+       ret = -pci_mapreg_info(dev_priv->pc, dev_priv->tag, 0x18,
+           PCI_MAPREG_MEM_TYPE_64BIT, NULL, &size, NULL);
+       if (ret)
+               goto err_out;
+       dev_priv->mm.gtt->gtt_mappable_entries = size >> PAGE_SHIFT;
+       /* 64/512MB is the current min/max we actually know of, but this is just a
+        * coarse sanity check.
+        */
+       if ((dev_priv->mm.gtt->gtt_mappable_entries >> 8) < 64 ||
+           dev_priv->mm.gtt->gtt_mappable_entries > dev_priv->mm.gtt->gtt_total_entries) {
+               DRM_ERROR("Unknown GMADR entries (%d)\n",
+                         dev_priv->mm.gtt->gtt_mappable_entries);
+               ret = -ENXIO;
+               goto err_out;
+       }
+
+       ret = setup_scratch_page(dev);
+       if (ret) {
+               DRM_ERROR("Scratch setup failed\n");
+               goto err_out;
+       }
+
+#if 0
+       if (bus_space_map(dev_priv->bst, gtt_bus_addr,
+           dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t),
+           BUS_SPACE_MAP_PREFETCHABLE, &dev_priv->mm.gtt->gtt)) {
+               DRM_ERROR("Failed to map the gtt page table\n");
+               teardown_scratch_page(dev);
+               ret = -ENOMEM;
+               goto err_out;
+       }
+#else
+       if (bus_space_subregion(dev_priv->bst, dev_priv->regs->bsh, (2<<20),
+           dev_priv->mm.gtt->gtt_total_entries * sizeof(gtt_pte_t),
+           &dev_priv->mm.gtt->gtt)) {
+               DRM_ERROR("Failed to map the gtt page table %d\n", ret);
+               teardown_scratch_page(dev);
+               ret = -ENOMEM;
+               goto err_out;
+       }
+#endif
+
+       /* GMADR is the PCI aperture used by SW to access tiled GFX surfaces in a linear fashion. */
+       DRM_INFO("Memory usable by graphics device = %dM\n", dev_priv->mm.gtt->gtt_total_entries >> 8);
+       DRM_DEBUG_DRIVER("GMADR size = %dM\n", dev_priv->mm.gtt->gtt_mappable_entries >> 8);
+       DRM_DEBUG_DRIVER("GTT stolen size = %dM\n", dev_priv->mm.gtt->stolen_size >> 20);
+
+       return 0;
+
+err_out:
+       kfree(dev_priv->mm.gtt);
+       if (INTEL_INFO(dev)->gen < 6)
+               intel_gmch_remove();
+       return ret;
+}
+
+void i915_gem_gtt_fini(struct drm_device *dev)
+{
+       struct drm_i915_private *dev_priv = dev->dev_private;
+//     iounmap(dev_priv->mm.gtt->gtt);
+       teardown_scratch_page(dev);
+       if (INTEL_INFO(dev)->gen < 6)
+               intel_gmch_remove();
+       kfree(dev_priv->mm.gtt);
+}
+
+#endif