%% NASA battery degradation data 
% This script loads impedance data from the NASA open repository and
% performs manipulations for visualization and analysis. B. Saha and K.
% Goebel (2007). "Battery Data Set", NASA Ames Prognostics Data Repository
% (http://ti.arc.nasa.gov/project/prognostic-data-repository), NASA Ames
% Research Center, Moffett Field, CA
% http://ti.arc.nasa.gov/tech/dash/pcoe/prognostic-data-repository/

%% Load the data
clear
dir = 'C:\Work\Projects\Aging\dataAnalytics\Sundar';
myData = load([dir '\B0006.mat']);
fieldName = char(fieldnames(myData));
% Create a structure with the data of interest
myData = myData.(fieldName);

%% Create a table with the data for easier access
% Determine how many sets of data the structure contains.
n = size(myData.cycle,2);
% Pre allocate the table.
t = cell2table(cell(n,8));
% Define the variable names (column names)
t.Properties.VariableNames = {...
    'Cycle','Type','AmbientTemp','Capacity','Re','Rct','Time','TestData'};

%% Populate the table with the contents of the structure.
for i = 1:n
    % Note the cycle
    t.Cycle{i} = i;
    
    % Add the type.
    t.Type{i} = myData.cycle(i).type;
    
    % Add Ambient Temp.
    t.AmbientTemp{i} = myData.cycle(i).ambient_temperature;
    
    % Add Time.
    t.Time{i} = myData.cycle(i).time;
    
    % Deal with different test data based on the cycle type.
    switch(t.Type{i})
        
        case('charge')
            % Collect the Test Data into a table within the main table.
            t.TestData{i} = array2table([...
                myData.cycle(i).data.Time',...
                myData.cycle(i).data.Voltage_measured',...
                myData.cycle(i).data.Current_measured',...
                myData.cycle(i).data.Temperature_measured',...
                myData.cycle(i).data.Current_charge',...
                myData.cycle(i).data.Voltage_charge'],...
                'VariableNames',...
                {'Time','VoltageMeas','CurrentMeas',...
                'TempMeas','CurrentCharge','VoltageCharge'});
            t.Capacity{i} = NaN;
            t.Re{i} = NaN;
            t.Rct{i} = NaN;
            
            
        case('discharge')
            % Collect the Test Data into a table within the main table.
            t.TestData{i} = array2table([...
                myData.cycle(i).data.Time',...
                myData.cycle(i).data.Voltage_measured',...
                myData.cycle(i).data.Current_measured',...
                myData.cycle(i).data.Temperature_measured',...
                myData.cycle(i).data.Current_load',...
                myData.cycle(i).data.Voltage_load'],...
                'VariableNames',...
                {'Time','Voltage_measured','Current_measured',...
                'Temperature_measured','Current_load','Voltage_load'});
            
            % Test to see if "Capacity" is included in this data.
            if isfield(myData.cycle(i).data,'Capacity')
                t.Capacity{i} = myData.cycle(i).data.Capacity;
            else
                t.Capacity{i} = NaN;
            end
            
            t.Re{i} = NaN;
            t.Rct{i} = NaN;
            
            
        case('impedance')
            % Make sure the length of "Rectified_Impedance" is the same as
            % the rest of the test data.  It seems like it is always
            % shorter than the rest of the data.  If it is shorter, pad it
            % with NaN's.
            k = length(myData.cycle(i).data.Rectified_Impedance);
            j = length(myData.cycle(i).data.Battery_impedance);
            
            RectImp_pad = [...
                myData.cycle(i).data.Rectified_Impedance;...
                NaN(j-k,1)];       
            
            % Collect the Test Data into a table within the main table.
            t.TestData{i} = array2table([...
                myData.cycle(i).data.Sense_current',...
                myData.cycle(i).data.Battery_current',...
                myData.cycle(i).data.Current_ratio',...
                myData.cycle(i).data.Battery_impedance,...
                RectImp_pad],...
                'VariableNames',...
                {'Sense_current','Battery_current','Current_ratio',...
                'Battery_impedance','Rectified_Impedance'});
            
            t.Capacity{i} = NaN;
            t.Re{i} = myData.cycle(i).data.Re;
            t.Rct{i} = myData.cycle(i).data.Rct;
    end    
end
%% General housekeeping and clean up.
t.Cycle = cell2mat(t.Cycle);
t.Type = categorical(t.Type);
t.AmbientTemp = cell2mat(t.AmbientTemp);
t.Capacity = cell2mat(t.Capacity);
t.Rct = cell2mat(t.Rct);
t.Re = cell2mat(t.Re);
t.Time = cell2mat(t.Time);

%% Timetable conversion
t.newTime = datetime(zeros(length(t.Time),6));
for i=1:length(t.Time)
    t.newTime(i) = datetime(t.Time(i,:));
end
t.Properties.VariableNames{7} = 'oldTime';
tt = table2timetable(t,'RowTimes','newTime');
tt.oldTime = [];

%% Visualization
% Capacity. Look for nan's in capacity data
idxCap = isnan(tt.Capacity);
% Plot capacity when available
f1 = figure(1);
plot(tt.Cycle(~idxCap),tt.Capacity(~idxCap),'.')
xlabel('cycle')
ylabel('capacity (Ah)')

%% Impedance
% find location of Z measurement
idxZ = tt.Type == 'impedance';
% build ordinal vector to find cycle # of each Z experiment
idx1 = (1:length(tt.TestData))'.*idxZ;
% keep the non-zeros
idx2 = nonzeros(idx1);
% aux = zeros(length(t.TestData),length(idx2));
for k=1:length(idx2)
    aux(:,k) = tt.TestData{idx2(k),1}.Rectified_Impedance;
%     aux(:,k) = tt.TestData{idx2(k),1}.Battery_impedance;
end
f2 = figure(2); clf(f2); hold on
for k=1:40:length(idx2)
    plot(real(aux(:,k)),-imag(aux(:,k)),'.','LineStyle','none');
end
% axis equal

%% Rct and Rs
f3 = figure(3);
clf(f3)
hold on
scatter(tt.Cycle(idx2),tt.Rct(idx2),'.')
scatter(tt.Cycle(idx2),tt.Re(idx2),'.','r')
legend({'Rct' 'Re'})
ylabel('Rct & Re (\Omega)')
xlabel('cycle')

%% Looking for impedance signature of capacity fade
% Tests begin with charge and discharge cycles. At cycle 41 impedance is
% recorded for the first time. Plotting capacity (only recorded at
% discharge) vs. Rct or Rs requires some index manipulation.
% Let's try to identify which impedance measurements are immediately
% followed by discharge measurements (i.e. by capacity measurements)
% idx4 corresponds to impedance experiments followed by capacity
% measurement.
idx3 = idx2 .* (tt.Type(idx2 + 1) == 'discharge');
idx4 = nonzeros(idx3);

% Capacity vs. (Rct and Re)
f4 = figure(4); clf(f4); hold on
scatter(tt.Capacity(idx4(1)+1) - tt.Capacity(idx4+1),tt.Rct(idx4),'.')
scatter(tt.Capacity(idx4(1)+1) - tt.Capacity(idx4+1),tt.Re(idx4),'.','r')
xlabel('Capacity fade (initial - final) (Ah)')
ylabel('Rct(\Omega blue)   Re(\Omega red)')

%% Relative change in Rct and Re
Rct_norm = tt.Rct(idx4(1))./tt.Rct(idx4);
Re_norm = tt.Re(idx4(1))./tt.Re(idx4);
f5 = figure(5); clf(f5); hold on
scatter(Re_norm,Rct_norm,'.')
xlabel('Re_0/Re')
ylabel('Rct_0/Rct')
xlim([min(Rct_norm) 1])
ylim([min(Rct_norm) 1])
axis square
line([0 1],[0 1])

%% Input data preparation
% Define predictor and objective. Must be an array.
x = tt{idx4,5:6}; % Rct and Re
y = tt{idx4+1,4}; % Capacity

%% Define training and testing
% Split between training and testing using cvpartition
testFraction = 0.2;
dataPartition = cvpartition(length(x),'HoldOut',testFraction);
Xtrain = x(dataPartition.training,:); % Rct and Re
Ytrain = y(dataPartition.training,:); % Capacity
Xtest = x(dataPartition.test,:); % Rct and Re
Ytest = y(dataPartition.test,:); % Capacity

%% Visualize predicted and predictors
f10 = figure(10); clf(f10); hold on
f10 =   plot(Xtrain(:,1),Ytrain,'ok','MarkerFaceColor','k'); hold on
        plot(Xtest(:,1),Ytest,'or','MarkerFaceColor','r')
        plot(Xtrain(:,2),Ytrain,'sk','MarkerFaceColor','k'); hold on
        plot(Xtest(:,2),Ytest,'sr','MarkerFaceColor','r')
xlabel('Re Rct /\Omega')
ylabel('capacity /Ah')
legend({'Re train' 'Re test' 'Rct train' 'Rct test'})

%% Parametric modeling - Linear model
mdl_lm = fitlm(Xtrain,Ytrain);
pred_lm = predict(mdl_lm,Xtest);

f9 = plotPrediction(pred_lm,Ytest,9,'Linear Model');

%% Nonparametric modeling - Decision tree
mdl_dt = fitrtree(Xtrain,Ytrain);%,'OptimizeHyperparameters','auto');
pred_dt = predict(mdl_dt,Xtest);

f8 = plotPrediction(pred_dt,Ytest,8,'Decision Tree');

%% Nonparametric modeling - Neural Network
trainFcn = 'trainlm';  % Levenberg-Marquardt training algorithm
hiddenLayerSize = 20;
net = fitnet(hiddenLayerSize,trainFcn);

% Train the Network
mdl_net = train(net,Xtrain',Ytrain','UseParallel','no');

fprintf('Neural Net Training set MAPE: %0.2f%%\n', ...
    mape(Ytrain, mdl_net(Xtrain')')*100);

% make predictions on the test data set and plot results
Y_nn = mdl_net(Xtest')';

f6 = plotPrediction(Y_nn,Ytest,6,'Neural Network');

%% X - Y plot for comparison between fit methods and measurement
f7 = figure(7); clf(f7)
scatter(Ytest,pred_lm, 'MarkerFaceColor', 'b'); hold on;
scatter(Ytest,pred_dt, 'MarkerFaceColor', 'k'); 
scatter(Ytest,Y_nn, 'MarkerFaceColor', 'r'); 
line([0.9*min(Ytest) 1.1*max(Ytest)],[0.9*min(Ytest) 1.1*max(Ytest)])
xlabel('measured capacity (Ah)')
ylabel('predicted capacity (Ah)')
legend({'Linear' 'Decision Tree' 'Neural Network'},'location','best')
xlim([0.9*min(Ytest) 1.1*max(Ytest)])
ylim([0.9*min(Ytest) 1.1*max(Ytest)])
axis equal

%% Classification
% A less stringent predictive method for battery capacity is to clasify it
% in categories such as excellent (1), good, (2), poor (3). This
% requirement lends itself to classification techniques that can be
% explored using the Classification Learner App.
nominalCapacity = tt.Capacity(idx4(1)+1);
tt.Group = zeros(height(tt),1);
tt.Group(tt.Capacity/nominalCapacity > 0.9) = 1;
tt.Group(tt.Capacity/nominalCapacity < 0.9 & ...
         tt.Capacity/nominalCapacity > 0.7) = 2;
tt.Group(tt.Capacity/nominalCapacity < 0.7) = 3;
z = tt{idx4+1,8}; % Categorical Capacity
classificationTable = table(x(:,1),x(:,2),z,'VariableNames',{'Re' 'Rct' 'catCapacity'});