/*
 * Copyright 1998 by Kenneth C. Dyke
 *       All Rights Reserved
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted,
 * provided that the above copyright notice appears in all copies and
 * that both the copyright notice and this permission notice appear in
 * supporting documentation.
 *
 * THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 * FOR A PARTICULAR PURPOSE.
 *
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
 * NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#import <driverkit/IOConfigTable.h>
#import <driverkit/generalFuncs.h>
#import <driverkit/kernelDriver.h>
#import <kernserv/ppc/spl.h>
#import <mach/vm_param.h>
#import <stdlib.h>
#import <string.h>

#import "G3CacheEnabler.h"
#import "PowerPCInline.h"

static const char CacheEnableKey[] = "CacheEnable";
static const char CacheParityEnableKey[] = "CacheParityEnable";
static const char CacheClockRatioKey[] = "CacheClockRatio";
static const char CacheRAMSizeKey[] = "CacheRAMSize";
static const char CacheRAMTypeKey[] = "CacheRAMType";
static const char CacheControlKey[] = "CacheControl";
static const char CacheWriteThroughKey[] = "CacheWriteThrough";
static const char CacheOutputHoldKey[] = "CacheOutputHold";
static const char CacheDLLSlowKey[] = "CacheDLLSlow";
static const char CacheDifferentialClockKey[] = "CacheDifferentialClock";
static const char CacheDLLBypassKey[] = "CacheDLLBypass";
static const char CPUJunctionTemperatureKey[] = "CPUJunctionTemperature";
static const char CPUHzKey[] = "CPUHz";
static const char BusHzKey[] = "BusHz";
static const char InstructionForwardingIntervalKey[] = "InstructionForwardingInterval";

static const int busDividers[16] =
{
    2,				// 0000 - Reserved
    15,				// 0001 - 7.5x
    14,				// 0010 - 7x
    2,				// 0011 - PLL off/bypass, 1x
    2,				// 0100 - Reserved
    13,				// 0101 - 6.5x
    2,				// 0110 - Reserved
    9,				// 0111 - 4.5x
    6,				// 1000 - 3x
    11,				// 1001 - 5.5x
    8,				// 1010 - 4x
    10,				// 1011 - 5x
    16,				// 1100 - 8x
    12,				// 1101 - 6x
    7,				// 1110 - 3.5x
    2,				// 1111 - PLL off
};

static BOOL isYES(const char *str)
{
    return (strcmp(str,"YES") == 0) ? YES : NO;
}

@implementation G3CacheEnabler

// Disable cache, but don't modify our known settings.

// Note: This method does really scary stuff, and will most likely not work
// at all if you try to compile this file without optimization turned on.  Do
// not futz around with this code unless you really know what you are doing.
- (void)_disableCache
{
    register unsigned long addr, i;
    register HID0 hid0_save, hid0;
    register L2CR l2disable;
    register MSR msr_save, msr;    
    L2CR temp;
    
    if(_control.field.l2enable == 0)
        return;

    // Just use the lower 1MB + 32K of memory to flush cache.  We don't modify this memory
    // so this should be safe.  If there is no memory there, then I have no idea how we reached
    // this point in the first place.
    
    // Turn off interrupts.
    msr_save = readMSR();
    msr = msr_save;
    msr.field.ee = 0;
    writeMSR(msr);
    
    // Get HID0.
    hid0 = hid0_save = readHID0();

    // Turn on bit to assist with flush.
    hid0.field.dcfa = 1;
    writeHID0(hid0);

    // Disable instruction cache accesses to the L2.  This
    // may not be necessary, but it seems like it could screw
    // up the LRU bits in the L2 cache if an instruction miss
    // below causes the L2 cache to load something up.
    _control.field.l2dataonly = 1;
    writeL2CR(_control);

    // Save a copy of the settings we'll use to disable the cache.
    l2disable = _control;
    l2disable.field.l2enable = 0;

    // Really scary... turn off address translation for data
    // accesses.  From here until we turn it back on, we better
    // not touch any memory!  This means that compiling without
    // optimization will most likely prevent this code from working
    // at all, since access to our local stack will not work.  Thankfully
    // PowerPC's have plenty of registers.
    msr.field.dr = 0;
    writeMSR(msr);
    
    // Flush L1+l2 cache.
    for(i = 0, addr = 0; i < (1024*1024+32768)/32; i++)
    {
        *((volatile unsigned char *)addr);
        addr += 32;
    }
    // Now walk again and flush it.
    for(i = 0, addr = 0; i < (1024*1024+32768)/32; i++)
    {
        __asm volatile("dcbf 0,%0" : /* No outputs */ : "r" (addr));
        addr += 32;
    }
    // Wait for any pending writes to complete.
    __asm volatile("sync");

    // Turn off L2 cache now.
    writeL2CR(l2disable);
    
    // Restore data address translation
    msr.field.dr = 1;
    writeMSR(msr);

    // Restore HID0 settings.  This clears the DCFA bit.
    writeHID0(hid0_save);

    // Restore interrupts.
    writeMSR(msr_save);

    // Set state flags.
    _control.field.l2enable = 0;
    _control.field.l2dataonly = 0;

    // Invalidate the cache, just be be sure.
    _control.field.l2invalidate = 1;
    writeL2CR(_control);
    
    // Wait for invalidate to finish...
    temp = readL2CR();
    while(temp.field.l2invalidateinprogress)
        temp = readL2CR();

    // Fix flag.
    _control.field.l2invalidate = 0;

    // Shut it down completely.
    temp.value = 0;
    writeL2CR(temp);

    IOLog("L2 Cache Disabled.\n");
}

- (void)_enableCache
{
    L2CR temp;

    if(_control.field.l2enable == 1)
        return;
    
    // First, shut it down nicely.  Don't mess with other bits
    // in case the clocks need to remain stable until the rest of
    // the CPU is finished with the cache.
    temp.value = 0;
    writeL2CR(temp);

    // Make copy so we can play with some bits.
    temp = _control;

    // First, turn it on.
    writeL2CR(temp);

    // Now invalidate it.
    temp.field.l2invalidate = 1;
    writeL2CR(temp);

    // Wait for invalidate to finish...
    temp = readL2CR();
    while(temp.field.l2invalidateinprogress)
        temp = readL2CR();

    // Enable cache.  Zoom! (or *Boom* if the settings are wrong!)
    _control.field.l2enable = 1;
    writeL2CR(_control);

    IOLog("L2 Cache Enabled.\n");
}

// Calculate a rough estimate of CPU speed in Hz.  This is primarily
// used to set up our TAU timer, and to show the clock speed to the
// user if that is desired.
- (void)_calcCPUSpeed
{
    register MSR msr, msr_save;
    ns_time_t start, stop, elapsed;
    
    unsigned long i;
    PMC clocks;

    // This is volatile so that GCC doesn't optimize away the
    // whole loop.
    volatile unsigned long result;
    HID1 hid;
    MMCR1 mmcr;
    
    // Set up PMC4 to count CPU clock cycles and clear it.
    mmcr.value = 0;
    mmcr.field.pmc4select = 1;

    // Get start time (64-bit nanosecond timer).
    IOGetTimestamp(&start);

    // Turn off interrupts just so this is somewhat accurate.
    msr = msr_save = readMSR();
    msr.field.ee = 0;
    writeMSR(msr);

    writeMMCR1(mmcr);
    clocks.value = 0;
    writePMC4(clocks);
    
    // Do something that'll take a 'long' time.  Divides
    // are a nice slow operation, so do about a million
    // of them.  Hopefully the PMC4 register won't overflow.
    for(i = 0; i < 1024*1024; i++)
    {
        result = i / 1219587;
    }
    clocks = readPMC4();
    
    // Restore interrupts.
    writeMSR(msr_save);

    // Get stop time.    
    IOGetTimestamp(&stop);
    mmcr.value = 0;
    writeMMCR1(mmcr);

    // Calculate real elapsed time.
    elapsed = stop - start;

    // Calculate number of clocks per second.
    _cpuHz = ((unsigned long long)clocks.field.counter * 1000000000UL) / elapsed;

    // Calculate bus speed.
    hid = readHID1();
    _busHz = (_cpuHz * 2) / busDividers[hid.field.pll];
    
    // Calculate thermal interval value to use.  This should be the number
    // of processor cycles it takes for 20 microseconds to go by.
    _tauInterval = (_cpuHz * 20) / 1000000;    
}

// This method calculates an approximate CPU junction temperature using a
// simple binary search.   It works well, but is perhaps not as accurate
// as it could be.  It's really only provided for amusement, anyway.
- (void)_calcTemperature
{
    long low = 0, high = 127, mid = 64, watchdog = 0;
    ns_time_t start, stop;
    
    THRM thermal1;
    THRM3 thermal3;
    
    while((high - low) > 1)
    {
        // Make sure we don't get stuck in here forever.
        if(watchdog++ > 255)
        {
            low = high = 0;
            break;
        }
        
        // Find new test point.
        mid = (high + low) / 2;
        
        // Program THRM1 to generate TIN=1 if temeprature
        // exceeds threshold value.
        thermal1.value = 0;
        thermal1.field.threshold = mid;
        thermal1.field.v = 1;

        // Program THRM3 to enable thermal unit.
        thermal3.value = 0;
        thermal3.field.sitv = _tauInterval;
        thermal3.field.e = 1;

        writeTHRM1(thermal1);
        writeTHRM3(thermal3);

        // Wait for TIV to indicate TIN is valid.
        IOGetTimestamp(&start);
        
        do
        {
            // Don't go for more than 30 microseconds, since that's
            // well beyond the timer countdown we set up initially.
            IOGetTimestamp(&stop);
            if((stop - start) > 30000)
            {
                // I commented out this log message, since it will happen
                // a lot if the user has really slowed down the instruction
                // cache forwarding rate to something rediculously slow.  Since
                // We're starved for CPU at that point anyway, I don't want
                // to make things worse by logging a bunch of messages.
                //IOLog("TAU timeout.\n");
                break;
            }
            thermal1 = readTHRM1();            
        }
        while (thermal1.field.tiv == 0);

        // Don't adjust values if there was a timeout.  Just
        // try again.  If we keep failing, the master watchdog
        // will bail us out.
        if(thermal1.field.tiv == 1)
        {
            // Check status of TIN bit.
            if(thermal1.field.tin == 1)
            {
                // Temperature exceeds threshold.
                low = mid;
            }
            else
            {
                // Temperature is below threshold.
                high = mid;
            }
        }
    }
    _temperature = (high + low) / 2;

    // Shut down TAU.
    thermal1.value = 0;
    thermal3.value = 0;
    writeTHRM1(thermal1);
    writeTHRM3(thermal3);
}

- (void)_setICTCInterval
{
    ICTC ictc;

    ictc.value = 0;

    if(_ictcInterval != 0)
    {
        ictc.field.fi = _ictcInterval;
        ictc.field.e = 1;
    }
    writeICTC(ictc);
}

- (BOOL)_getConfigBOOL:(IOConfigTable *)configTable forKey:(const char *)key value:(BOOL *)value
{
    const char *str;
    str = [configTable valueForStringKey:key];
    if(!str)
    {
        IOLog("%s: Can't find config table entry for %s\n",[self name],key);
        return NO;
    }
    
    if(!strcmp(str,"YES"))
        *value = YES;
    else
        *value = NO;

    [configTable freeString:str];
    return YES;
}

- (BOOL)_getConfigInt:(IOConfigTable *)configTable forKey:(const char *)key value:(int *)value
{
    const char *str;
    str = [configTable valueForStringKey:key];
    if(!str)
    {
        IOLog("%s: Can't find config table entry for %s\n",[self name],key);
        return NO;
    }
    
    // This should probably be a bit more robust.
    *value = strtol(str,NULL,0);

    [configTable freeString:str];
    return YES;
}

// Enable Full-Power DPM.
- (void)_enableDPM
{
    HID0 hid0;

    hid0 = readHID0();
    hid0.field.dpm = 1;
    writeHID0(hid0);    
}

// Disable Full-Power DPM.
- (void)_disableDPM
{
    HID0 hid0;

    hid0 = readHID0();
    hid0.field.dpm = 0;
    writeHID0(hid0);
}

// Standard DriverKit IODevice methods start here

+ (BOOL)probe:(id)deviceDescription
{
    PVR pvr = readPVR();

    // Make sure we're on a 750.
    if(pvr.field.version != 8)
        return NO;

    return [[self alloc] initFromDeviceDescription:deviceDescription] != nil;
    
}

- (id)initFromDeviceDescription:(id)deviceDescription
{
    if(self = [super initFromDeviceDescription:deviceDescription])
    {
        IOConfigTable *configTable = [deviceDescription configTable];
        BOOL boolValue;
        int intValue;

        // Find all of the config values we care about.
        if(![self _getConfigBOOL:configTable forKey:CacheEnableKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2enable = boolValue;

        if(![self _getConfigBOOL:configTable forKey:CacheParityEnableKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2parity = boolValue;

        if(![self _getConfigInt:configTable forKey:CacheClockRatioKey value:&intValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2clock = intValue;

        if(![self _getConfigInt:configTable forKey:CacheRAMSizeKey value:&intValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2size = intValue;

        if(![self _getConfigInt:configTable forKey:CacheRAMTypeKey value:&intValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2ram = intValue;
        
        if(![self _getConfigBOOL:configTable forKey:CacheControlKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2control = boolValue;

        if(![self _getConfigBOOL:configTable forKey:CacheWriteThroughKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2writethrough = boolValue;

        if(![self _getConfigInt:configTable forKey:CacheOutputHoldKey value:&intValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2outputhold = intValue;

        if(![self _getConfigBOOL:configTable forKey:CacheDLLSlowKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2slow = boolValue;

        if(![self _getConfigBOOL:configTable forKey:CacheDifferentialClockKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2differential = boolValue;

        if(![self _getConfigBOOL:configTable forKey:CacheDLLBypassKey value:&boolValue])
        {
            [self free];
            return nil;
        }
        _control.field.l2bypass = boolValue;

        // Whew.  Okay.. if it's supposed to be turned on, do it now.
        if(_control.field.l2enable)
        {
            _control.field.l2enable = 0; // Hack
            [self _enableCache];
        }

        // Enable DPM since it doesn't hurt anything.
        [self _enableDPM];
        
        // Calculate CPU and bus speed once so we have it for later.
        [self _calcCPUSpeed];
        
        // Register ourselves with DriverKit since everything went okay.
        [self setName:"G3CacheEnabler"];
        [self setUnit:0];
        [self registerDevice];
    }
    return self;
}

- (IOReturn)getIntValues: (unsigned *)parameterArray
            forParameter: (IOParameterName)parameterName
                   count: (unsigned *)count
{
    unsigned param;
    unsigned int maxCount = *count;

    if(!strcmp(parameterName,CacheClockRatioKey))
    {
        param = _control.field.l2clock;
    }
    else if(!strcmp(parameterName,CacheRAMSizeKey))
    {
        param = _control.field.l2size;
    }
    else if(!strcmp(parameterName,CacheRAMTypeKey))
    {
        param = _control.field.l2ram;
    }
    else if(!strcmp(parameterName,CacheOutputHoldKey))
    {
        param = _control.field.l2outputhold;
    }
    else if(!strcmp(parameterName,CPUJunctionTemperatureKey))
    {
        [self _calcTemperature];
        param = _temperature;
    }
    else if(!strcmp(parameterName,CPUHzKey))
    {
        param = _cpuHz;
    }
    else if(!strcmp(parameterName,BusHzKey))
    {
        param = _busHz;
    }
    else if(!strcmp(parameterName,InstructionForwardingIntervalKey))
    {
        param = _ictcInterval;
    }
    else
    {
        return [super getIntValues:parameterArray
                      forParameter:parameterName
                             count:count];
    }
    // Paranoia. Make sure there is actually some room to return a value.
    if(maxCount < 1)
        return IO_R_IO;

    *parameterArray = param;
    *count = 1;
    
    return IO_R_SUCCESS;
}

- (IOReturn)getCharValues: (unsigned char *)parameterArray
             forParameter: (IOParameterName)parameterName
                    count: (unsigned *)count
{
    const char *param;
    unsigned int length;
    unsigned int maxCount = *count;

    if(!strcmp(parameterName,CacheEnableKey))
    {
        param = _control.field.l2enable ? "YES" : "NO";
    }
    else if (!strcmp(parameterName,CacheParityEnableKey))
    {
        param = _control.field.l2parity ? "YES" : "NO";
    }
    else if(!strcmp(parameterName,CacheControlKey))
    {
        param = _control.field.l2control ? "YES" : "NO";
    }
    else if(!strcmp(parameterName,CacheWriteThroughKey))
    {
        param = _control.field.l2writethrough ? "YES" : "NO";
    }
    else if(!strcmp(parameterName,CacheDLLSlowKey))
    {
        param = _control.field.l2slow ? "YES" : "NO";
    }
    else if(!strcmp(parameterName,CacheDLLBypassKey))
    {
        param = _control.field.l2bypass ? "YES" : "NO";
    }
    else if(!strcmp(parameterName,CacheDifferentialClockKey))
    {
        param = _control.field.l2differential ? "YES" : "NO";
    }
    else
    {
        return [super getCharValues:parameterArray
                       forParameter:parameterName
                              count:count];
    }

    length = strlen(param);
    if(length >= maxCount)
    {
        length = maxCount - 1;
    }
    *count = length + 1;
    strncpy(parameterArray, param, length);
    parameterArray[length] = '\0';
    return IO_R_SUCCESS;
}

// Note: Changes to cache parameters do not take effect until
// the cache is turned off and back on.
- (IOReturn)setIntValues: (unsigned *)parameterArray
            forParameter: (IOParameterName)parameterName
                   count: (unsigned)count
{
    unsigned param = *parameterArray;

    if(!strcmp(parameterName,CacheClockRatioKey))
    {
        // Paranoia.
        if((param == 3) || (param == 7))
            return IO_R_UNSUPPORTED;
        
        _control.field.l2clock = param;
    }
    else if(!strcmp(parameterName,CacheRAMSizeKey))
    {
        // Paranoia
        if(param == 0)
            return IO_R_UNSUPPORTED;
        
        _control.field.l2size = param;
    }
    else if(!strcmp(parameterName,CacheRAMTypeKey))
    {
        // Paranoia
        if(param == 1)
            return IO_R_UNSUPPORTED;
        
        _control.field.l2ram = param;
    }
    else if(!strcmp(parameterName,CacheOutputHoldKey))
    {
        // Paranoia
        if(param >= 2)
            return IO_R_UNSUPPORTED;
        
        _control.field.l2outputhold = param;
    }
    else if(!strcmp(parameterName,InstructionForwardingIntervalKey))
    {
        _ictcInterval = param;
        [self _setICTCInterval];
    }
    else
    {
        return [super setIntValues:parameterArray
                      forParameter:parameterName
                             count:count];
    }
    return IO_R_SUCCESS;
}

- (IOReturn)setCharValues: (unsigned char *)parameterArray
             forParameter: (IOParameterName)parameterName
                    count: (unsigned)count
{
    const char *param = parameterArray;

    if(!strcmp(parameterName,CacheEnableKey))
    {
        if(isYES(param))
        {
            [self _enableCache];
        }
        else
        {
            [self _disableCache];
        }
    }
    else if (!strcmp(parameterName,CacheParityEnableKey))
    {
        _control.field.l2parity = isYES(param);
    }
    else if(!strcmp(parameterName,CacheControlKey))
    {
        _control.field.l2control = isYES(param);
    }
    else if(!strcmp(parameterName,CacheWriteThroughKey))
    {
        _control.field.l2writethrough = isYES(param);
    }
    else if(!strcmp(parameterName,CacheDLLSlowKey))
    {
        _control.field.l2slow = isYES(param);
    }
    else if(!strcmp(parameterName,CacheDLLBypassKey))
    {
        _control.field.l2bypass = isYES(param);
    }
    else if(!strcmp(parameterName,CacheDifferentialClockKey))
    {
        _control.field.l2differential = isYES(param);
    }
    else
    {
        return [super setCharValues:parameterArray
                       forParameter:parameterName
                              count:count];
    }
    return IO_R_SUCCESS;
}

@end
