Skip to content

AttributeError: 'Pipeline' object has no attribute 'name' #17

@Gaurav7296

Description

@Gaurav7296

tried serializing the below pipepline

if I try remove the init argument from serialize to bundle
error is
"AttributeError: 'OutletTypeEncoder' object has no attribute 'op'"

importing required libraries

import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
import category_encoders as ce
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
#from sklearn.preprocessing import StandardScaler, MinMaxScaler, Imputer, Binarizer, PolynomialFeatures
from sklearn.pipeline import Pipeline
import mleap.sklearn.pipeline
import mleap.sklearn.feature_union
import mleap.sklearn.base
import mleap.sklearn.logistic
import mleap.sklearn.preprocessing

read the training data set

data = pd.read_csv('market.csv')

top rows of the data

#print(data.head(5))

seperate the independent and target variables

train_x = data.drop(columns=['Item_Outlet_Sales'])
train_y = data['Item_Outlet_Sales']

import the BaseEstimator

from sklearn.base import BaseEstimator

define the class OutletTypeEncoder

This will be our custom transformer that will create 3 new binary columns

custom transformer must have methods fit and transform

class OutletTypeEncoder(BaseEstimator):

def __init__(self):
    pass

def fit(self, documents, y=None):
    return self

def transform(self, x_dataset):
    x_dataset['outlet_grocery_store'] = (x_dataset['Outlet_Type'] == 'Grocery Store') * 1
    x_dataset['outlet_supermarket_3'] = (x_dataset['Outlet_Type'] == 'Supermarket Type3') * 1
    x_dataset['outlet_identifier_OUT027'] = (x_dataset['Outlet_Identifier'] == 'OUT027') * 1

    return x_dataset

pre-processsing step

Drop the columns -

Impute the missing values in column Item_Weight by mean

Scale the data in the column Item_MRP

pre_process = ColumnTransformer(remainder='passthrough',
transformers=[('drop_columns', 'drop', ['Item_Identifier',
'Outlet_Identifier',
'Item_Fat_Content',
'Item_Type',
'Outlet_Identifier',
'Outlet_Size',
'Outlet_Location_Type',
'Outlet_Type'
]),
('impute_item_weight', SimpleImputer(strategy='mean'), ['Item_Weight']),
('scale_data', StandardScaler(),['Item_MRP'])])

Define the Pipeline

"""
Step1: get the oultet binary columns
Step2: pre processing
Step3: Train a Random Forest Model
"""
model_pipeline = Pipeline(steps=[('get_outlet_binary_columns', OutletTypeEncoder()),
('pre_processing',pre_process),
('random_forest', RandomForestRegressor(max_depth=10,random_state=2))
])

fit the pipeline with the training data

model_pipeline.fit(train_x,train_y)

read the test data

test_data = pd.read_csv('test.csv')

predict target variables on the test data

#model_pipeline.predict(test_data)

Serialiaze the random forest model

model_pipeline.serialize_to_bundle('/tmp', 'market.rf', init=True)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions