JustPaste.it

%python
dbutils.fs.ls("dbfs:/databricks-datasets/wine-quality/")
import pyspark.pandas as pd
white_wine = pd.read_csv("dbfs:/databricks-datasets/wine-quality/winequality-white.csv", sep=";")
red_wine = pd.read_csv("dbfs:/databricks-datasets/wine-quality/winequality-red.csv", sep=";")
red_wine['is_red'] = 1
white_wine['is_red'] = 0
data = pd.concat([red_wine, white_wine], axis=0)
data.rename(columns=lambda x: x.replace(' ', '_'),
inplace=True)
data.head()
high_quality = (data.quality >= 7).astype(int)
data.quality = high_quality
from sklearn.model_selection import train_test_split
train, test = train_test_split(data, random_state=123)
X_train = train.drop(["quality"], axis=1)
X_test = test.drop(["quality"], axis=1)
y_train = train.quality
y_test = test.quality
 
import mlflow
import mlflow.pyfunc
import mlflow.sklearn
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from mlflow.models.signature import infer_signature
 
class SklearnModelWrapper(mlflow.pyfunc.PythonModel):
  def __init__(self, model):
    self.model = model
  def predict(self, context, model_input):
    return self.model.predict_proba(model_input)[:,1]
 
token = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()
dbutils.fs.put("file:///root/.databrickscfg","[DEFAULT]\nhost=https://community.cloud.databricks.com\ntoken = "+token,overwrite=True)