Unterschiede
Hier werden die Unterschiede zwischen zwei Versionen angezeigt.
| Beide Seiten der vorigen Revision Vorhergehende Überarbeitung Nächste Überarbeitung | Vorhergehende Überarbeitung | ||
| de:modul:m245:learningunits:lu02:loesungen:l02 [2026/01/05 13:32] – vdemir | de:modul:m245:learningunits:lu02:loesungen:l02 [2026/04/08 08:43] (aktuell) – [Modellvergleich] vdemir | ||
|---|---|---|---|
| Zeile 3: | Zeile 3: | ||
| ===== Voraussetzung ===== | ===== Voraussetzung ===== | ||
| - | | + | <code python> |
| - | + | pip install pandas scikit-learn joblib | |
| + | </ | ||
| ===== Python-Skript: | ===== Python-Skript: | ||
| + | <code python> | ||
| + | import pandas as pd | ||
| + | from sklearn.model_selection import train_test_split | ||
| + | from sklearn.pipeline import Pipeline | ||
| + | from sklearn.preprocessing import StandardScaler | ||
| + | from sklearn.linear_model import LogisticRegression | ||
| + | from sklearn.tree import DecisionTreeClassifier | ||
| + | from sklearn.metrics import accuracy_score, | ||
| + | import joblib | ||
| + | |||
| + | # ----------------------------- | ||
| + | # Daten laden | ||
| + | # ----------------------------- | ||
| + | data = pd.read_csv(" | ||
| + | X = data.drop(" | ||
| + | y = data[" | ||
| - | import pandas as pd | + | # ----------------------------- |
| - | from sklearn.model_selection import train_test_split | + | # Train / Test Split |
| - | from sklearn.pipeline import Pipeline | + | # ----------------------------- |
| - | from sklearn.preprocessing import StandardScaler | + | X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, |
| - | from sklearn.linear_model import LogisticRegression | + | |
| - | from sklearn.tree import DecisionTreeClassifier | + | # ----------------------------- |
| - | from sklearn.metrics import accuracy_score, | + | # Modell 1: Logistische Regression |
| - | import joblib | + | # ----------------------------- |
| - | # | + | log_reg_pipeline = Pipeline([ |
| - | # ----------------------------- | + | (" |
| - | # Daten laden | + | (" |
| - | # ----------------------------- | + | ]) |
| - | data = pd.read_csv(" | + | |
| - | # | + | log_reg_pipeline.fit(X_train, |
| - | X = data.drop(" | + | y_pred_lr = log_reg_pipeline.predict(X_test) |
| - | y = data[" | + | # |
| - | # | + | print(" |
| - | | + | print(" |
| - | # Train / Test Split | + | print(" |
| - | # ----------------------------- | + | print(" |
| - | X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, | + | |
| - | # | + | # ----------------------------- |
| - | # ----------------------------- | + | # Modell 2: Decision Tree |
| - | # Modell 1: Logistische Regression | + | # ----------------------------- |
| - | # ----------------------------- | + | tree_model = DecisionTreeClassifier(random_state=42) |
| - | log_reg_pipeline = Pipeline([ | + | tree_model.fit(X_train, |
| - | (" | + | y_pred_tree = tree_model.predict(X_test) |
| - | (" | + | |
| - | ]) | + | print(" |
| - | # | + | print(" |
| - | log_reg_pipeline.fit(X_train, | + | print(" |
| - | y_pred_lr = log_reg_pipeline.predict(X_test) | + | print(" |
| - | # | + | |
| - | print(" | + | # ----------------------------- |
| - | print(" | + | # Bestes Modell speichern |
| - | print(" | + | # ----------------------------- |
| - | print(" | + | joblib.dump(log_reg_pipeline, |
| - | # | + | |
| - | # ----------------------------- | + | # ----------------------------- |
| - | # Modell 2: Decision Tree | + | # Neue Vorhersage |
| - | # ----------------------------- | + | # ----------------------------- |
| - | tree_model = DecisionTreeClassifier(random_state=42) | + | new_customer = pd.DataFrame([{ |
| - | tree_model.fit(X_train, | + | " |
| - | y_pred_tree = tree_model.predict(X_test) | + | " |
| - | # | + | " |
| - | print(" | + | }]) |
| - | print(" | + | |
| - | print(" | + | loaded_model = joblib.load(" |
| - | print(" | + | prediction = loaded_model.predict(new_customer) |
| - | # | + | |
| - | # ----------------------------- | + | print(" |
| - | # Bestes Modell speichern | + | </ |
| - | # ----------------------------- | + | |
| - | joblib.dump(log_reg_pipeline, | + | |
| - | # | + | |
| - | # ----------------------------- | + | |
| - | # Neue Vorhersage | + | |
| - | # ----------------------------- | + | |
| - | new_customer = pd.DataFrame([{ | + | |
| - | " | + | |
| - | " | + | |
| - | " | + | |
| - | }]) | + | |
| - | # | + | |
| - | loaded_model = joblib.load(" | + | |
| - | prediction = loaded_model.predict(new_customer) | + | |
| - | # | + | |
| - | print(" | + | |
| | | ||
| - | + | ===== Modellvergleich ===== | |
| - | ===== 3. Modellvergleich ===== | + | |
| ^ Kriterium ^ Logistische Regression ^ Decision Tree ^ | ^ Kriterium ^ Logistische Regression ^ Decision Tree ^ | ||
| | Interpretierbarkeit | hoch | mittel | | | Interpretierbarkeit | hoch | mittel | | ||
| | Overfitting-Gefahr | gering | hoch | | | Overfitting-Gefahr | gering | hoch | | ||
| - | | Skalierung | noetig ja | nein | | + | | Skalierung | nötig ja | nein | |
| | Didaktisch | sinnvoll sehr | ja | | | Didaktisch | sinnvoll sehr | ja | | ||
| - | Fazit: | + | ===== Fazit ===== |
| - | Bei kleinen, sauberen | + | |
| - | Decision Trees sind anschaulich, | + | |
| + | |||
| + | ---- | ||
| + | [[https:// | ||
| | | ||