update abhaengigkeit
This commit is contained in:
@@ -45,7 +45,7 @@
|
|||||||
" Anzahl starker Bremsvorgänge mit hoher Verzögerung (>0,3 g). Modellierung z.B. als Poisson-verteilte Zufallsvariable mit geringem Mittelwert (wenige Ereignisse pro 100 km). Harte Bremsmanöver korrelieren mit aggressiver Fahrweise und sind prädiktiv für Fahrerwechsel. \n",
|
" Anzahl starker Bremsvorgänge mit hoher Verzögerung (>0,3 g). Modellierung z.B. als Poisson-verteilte Zufallsvariable mit geringem Mittelwert (wenige Ereignisse pro 100 km). Harte Bremsmanöver korrelieren mit aggressiver Fahrweise und sind prädiktiv für Fahrerwechsel. \n",
|
||||||
" → Quelle: [Tesla Safety Score Definition](https://www.findmyelectric.com/blog/tesla-safety-score-beta-explained)\n",
|
" → Quelle: [Tesla Safety Score Definition](https://www.findmyelectric.com/blog/tesla-safety-score-beta-explained)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"4. **Geschwindigkeitsüberschreitungen (ordinal):** \n",
|
"4. **Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien):** \n",
|
||||||
" Häufigkeit bzw. Ausmaß, mit dem Tempolimits überschritten werden. Kategorisierung z.B. in: \n",
|
" Häufigkeit bzw. Ausmaß, mit dem Tempolimits überschritten werden. Kategorisierung z.B. in: \n",
|
||||||
" - *selten*, \n",
|
" - *selten*, \n",
|
||||||
" - *manchmal*, \n",
|
" - *manchmal*, \n",
|
||||||
@@ -58,24 +58,26 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"#### Nicht erklärende Variablen:\n",
|
"#### Nicht erklärende Variablen:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"5. **Straßentyp (nominal, 3 Kategorien):** \n",
|
|
||||||
" - *Autobahn* (~30–33 %) \n",
|
|
||||||
" - *Außerorts* (~43 %) \n",
|
|
||||||
" - *Innerorts* (~24–27 %) \n",
|
|
||||||
" → Modelliert als Multinomialverteilung. Kontextvariable ohne direkte Prädiktionswirkung. \n",
|
|
||||||
" → Quelle: [Klimaschutzinstrumente im Verkehr](https://openumwelt.de/server/api/core/bitstreams/07bd23f9-ff0a-41e5-b89b-8d7baf0a5c36/content)\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"6. **Wetterbedingungen (nominal, 3 Kategorien):** \n",
|
"5. **Wetterbedingungen (nominal, 3 Kategorien):** \n",
|
||||||
" - *trocken* (~70–75 %) \n",
|
" - *trocken* (~70–75 %) \n",
|
||||||
" - *nass* (~20–25 %) \n",
|
" - *nass* (~20–25 %) \n",
|
||||||
" - *winterlich* (<5 %) \n",
|
" - *winterlich* (<5 %) \n",
|
||||||
" → Modelliert als Multinomial. Kontextvariable. \n",
|
" → Modelliert als Multinomial. Kontextvariable. \n",
|
||||||
" Die Einteilung basiert auf Daten des Statistischen Bundesamtes: Etwa 70–75 % aller Fahrten finden unter trockenen Bedingungen statt, 20–25 % bei Nässe und unter 5 % bei Schnee oder Glätte.\n",
|
" Die Einteilung basiert auf Daten des Statistischen Bundesamtes: Etwa 70–75 % aller Fahrten finden unter trockenen Bedingungen statt, 20–25 % bei Nässe und unter 5 % bei Schnee oder Glätte.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"7. **Fahrstrecke (metrisch):** \n",
|
"6. **Fahrstrecke (metrisch):** \n",
|
||||||
" Länge der Fahrt (z.B. lognormalverteilt um 12 km). Kontextvariable, nicht direkt erklärend. \n",
|
" Länge der Fahrt (z.B. lognormalverteilt um 12 km). Kontextvariable, nicht direkt erklärend. \n",
|
||||||
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n",
|
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
" \n",
|
||||||
|
"7. **Straßentyp (nominal, 3 Kategorien):** \n",
|
||||||
|
" - *Autobahn* (~30–33 %) \n",
|
||||||
|
" - *Außerorts* (~43 %) \n",
|
||||||
|
" - *Innerorts* (~24–27 %) \n",
|
||||||
|
" → Modelliert als Multinomialverteilung. Kontextvariable ohne direkte Prädiktionswirkung. \n",
|
||||||
|
" → Quelle: [Klimaschutzinstrumente im Verkehr](https://openumwelt.de/server/api/core/bitstreams/07bd23f9-ff0a-41e5-b89b-8d7baf0a5c36/content)\n",
|
||||||
|
"\n",
|
||||||
"8. **Wochentag (nominal, 2–3 Kategorien):** \n",
|
"8. **Wochentag (nominal, 2–3 Kategorien):** \n",
|
||||||
" Z.B. *Werktag* vs *Wochenende* oder *Mo–Fr*, *Sa*, *So*. Modellierung als kategorische Variable. Kontextvariable. \n",
|
" Z.B. *Werktag* vs *Wochenende* oder *Mo–Fr*, *Sa*, *So*. Modellierung als kategorische Variable. Kontextvariable. \n",
|
||||||
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n"
|
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n"
|
||||||
@@ -91,7 +93,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 50,
|
||||||
"id": "97ba29a9",
|
"id": "97ba29a9",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -102,6 +104,8 @@
|
|||||||
"import matplotlib.pyplot as plt\n",
|
"import matplotlib.pyplot as plt\n",
|
||||||
"import seaborn as sns\n",
|
"import seaborn as sns\n",
|
||||||
"from scipy.stats import norm, multinomial, poisson, lognorm, bernoulli\n",
|
"from scipy.stats import norm, multinomial, poisson, lognorm, bernoulli\n",
|
||||||
|
"import statsmodels.api as sm\n",
|
||||||
|
"from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
|
||||||
"\n",
|
"\n",
|
||||||
"np.random.seed(11)\n",
|
"np.random.seed(11)\n",
|
||||||
"N = 1_000_000"
|
"N = 1_000_000"
|
||||||
@@ -117,7 +121,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 51,
|
||||||
"id": "61247da5",
|
"id": "61247da5",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@@ -131,21 +135,40 @@
|
|||||||
"# 3. Harte Bremsmanöver (metrisch, Poisson)\n",
|
"# 3. Harte Bremsmanöver (metrisch, Poisson)\n",
|
||||||
"hard_brakes = np.random.poisson(lam=2, size=N)\n",
|
"hard_brakes = np.random.poisson(lam=2, size=N)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 4. Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien)\n",
|
"# 4. Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien) – Abhängigkeit von avg_speed\n",
|
||||||
"speeding = np.random.choice(['selten', 'manchmal', 'häufig'], size=N, p=[0.6, 0.3, 0.1])\n",
|
"def softmax(x):\n",
|
||||||
|
" e_x = np.exp(x - np.max(x, axis=1, keepdims=True))\n",
|
||||||
|
" return e_x / e_x.sum(axis=1, keepdims=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 5. Straßentyp (nominal, Kontext)\n",
|
"speed_scaled = (avg_speed - avg_speed.min()) / (avg_speed.max() - avg_speed.min())\n",
|
||||||
"road_type = np.random.choice(['Autobahn', 'Außerorts', 'Innerorts'], size=N, p=[0.32, 0.43, 0.25])\n",
|
"strength = 0.01\n",
|
||||||
|
"logits = np.zeros((N, 3))\n",
|
||||||
|
"logits[:, 0] = 1 - strength * speed_scaled # selten\n",
|
||||||
|
"logits[:, 1] = 1 # manchmal\n",
|
||||||
|
"logits[:, 2] = 1 + strength * speed_scaled # häufig\n",
|
||||||
|
"probs = softmax(logits)\n",
|
||||||
|
"speeding = [np.random.choice(['selten', 'manchmal', 'häufig'], p=p) for p in probs]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 6. Wetterbedingungen (nominal, Kontext)\n",
|
"# 5. Wetterbedingungen (nominal, Kontext)\n",
|
||||||
"weather = np.random.choice(['trocken', 'nass', 'winterlich'], size=N, p=[0.75, 0.2, 0.05])\n",
|
"weather = np.random.choice(['trocken', 'nass', 'winterlich'], size=N, p=[0.75, 0.2, 0.05])\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# 7. Fahrstrecke (metrisch, lognormal)\n",
|
"# 6. Fahrstrecke (metrisch, lognormal)\n",
|
||||||
"mu, sigma = np.log(12), 0.7\n",
|
"mu, sigma = np.log(12), 0.7\n",
|
||||||
"trip_distance = np.random.lognormal(mean=mu, sigma=sigma, size=N)\n",
|
"trip_distance = np.random.lognormal(mean=mu, sigma=sigma, size=N)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"# 7. Straßentyp (nominal, Kontext) – schwächere Abhängigkeit von trip_distance\n",
|
||||||
|
"road_probs = []\n",
|
||||||
|
"for dist in trip_distance:\n",
|
||||||
|
" if dist < 5:\n",
|
||||||
|
" road_probs.append([0.5, 0.35, 0.15])\n",
|
||||||
|
" elif dist < 20:\n",
|
||||||
|
" road_probs.append([0.2, 0.5, 0.3])\n",
|
||||||
|
" else:\n",
|
||||||
|
" road_probs.append([0.1, 0.4, 0.5])\n",
|
||||||
|
"road_type = [np.random.choice(['Innerorts', 'Außerorts', 'Autobahn'], p=prob) for prob in road_probs]\n",
|
||||||
|
"\n",
|
||||||
"# 8. Wochentag (nominal, Kontext)\n",
|
"# 8. Wochentag (nominal, Kontext)\n",
|
||||||
"weekday = np.random.choice(['Mo-Fr', 'Sa', 'So'], size=N, p=[0.7, 0.15, 0.15])\n"
|
"weekday = np.random.choice(['Mo-Fr', 'Sa', 'So'], size=N, p=[0.7, 0.15, 0.15])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -158,29 +181,183 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 52,
|
||||||
"id": "f8e0efb0",
|
"id": "f8e0efb0",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<div>\n",
|
||||||
|
"<style scoped>\n",
|
||||||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||||||
|
" vertical-align: middle;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe tbody tr th {\n",
|
||||||
|
" vertical-align: top;\n",
|
||||||
|
" }\n",
|
||||||
|
"\n",
|
||||||
|
" .dataframe thead th {\n",
|
||||||
|
" text-align: right;\n",
|
||||||
|
" }\n",
|
||||||
|
"</style>\n",
|
||||||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||||||
|
" <thead>\n",
|
||||||
|
" <tr style=\"text-align: right;\">\n",
|
||||||
|
" <th></th>\n",
|
||||||
|
" <th>avg_speed</th>\n",
|
||||||
|
" <th>shift_behavior</th>\n",
|
||||||
|
" <th>hard_brakes</th>\n",
|
||||||
|
" <th>speeding</th>\n",
|
||||||
|
" <th>weather</th>\n",
|
||||||
|
" <th>trip_distance</th>\n",
|
||||||
|
" <th>road_type</th>\n",
|
||||||
|
" <th>weekday</th>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </thead>\n",
|
||||||
|
" <tbody>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>0</th>\n",
|
||||||
|
" <td>64.494547</td>\n",
|
||||||
|
" <td>früh</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>manchmal</td>\n",
|
||||||
|
" <td>trocken</td>\n",
|
||||||
|
" <td>30.449962</td>\n",
|
||||||
|
" <td>Außerorts</td>\n",
|
||||||
|
" <td>Mo-Fr</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>1</th>\n",
|
||||||
|
" <td>44.139270</td>\n",
|
||||||
|
" <td>früh</td>\n",
|
||||||
|
" <td>3</td>\n",
|
||||||
|
" <td>häufig</td>\n",
|
||||||
|
" <td>trocken</td>\n",
|
||||||
|
" <td>11.911103</td>\n",
|
||||||
|
" <td>Autobahn</td>\n",
|
||||||
|
" <td>Mo-Fr</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>2</th>\n",
|
||||||
|
" <td>42.154349</td>\n",
|
||||||
|
" <td>spät</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>selten</td>\n",
|
||||||
|
" <td>trocken</td>\n",
|
||||||
|
" <td>6.086055</td>\n",
|
||||||
|
" <td>Autobahn</td>\n",
|
||||||
|
" <td>Mo-Fr</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>3</th>\n",
|
||||||
|
" <td>20.466814</td>\n",
|
||||||
|
" <td>normal</td>\n",
|
||||||
|
" <td>5</td>\n",
|
||||||
|
" <td>häufig</td>\n",
|
||||||
|
" <td>trocken</td>\n",
|
||||||
|
" <td>5.732684</td>\n",
|
||||||
|
" <td>Außerorts</td>\n",
|
||||||
|
" <td>Mo-Fr</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" <tr>\n",
|
||||||
|
" <th>4</th>\n",
|
||||||
|
" <td>46.917154</td>\n",
|
||||||
|
" <td>spät</td>\n",
|
||||||
|
" <td>2</td>\n",
|
||||||
|
" <td>selten</td>\n",
|
||||||
|
" <td>trocken</td>\n",
|
||||||
|
" <td>7.256758</td>\n",
|
||||||
|
" <td>Außerorts</td>\n",
|
||||||
|
" <td>Mo-Fr</td>\n",
|
||||||
|
" </tr>\n",
|
||||||
|
" </tbody>\n",
|
||||||
|
"</table>\n",
|
||||||
|
"</div>"
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
" avg_speed shift_behavior hard_brakes speeding weather trip_distance \\\n",
|
||||||
|
"0 64.494547 früh 2 manchmal trocken 30.449962 \n",
|
||||||
|
"1 44.139270 früh 3 häufig trocken 11.911103 \n",
|
||||||
|
"2 42.154349 spät 2 selten trocken 6.086055 \n",
|
||||||
|
"3 20.466814 normal 5 häufig trocken 5.732684 \n",
|
||||||
|
"4 46.917154 spät 2 selten trocken 7.256758 \n",
|
||||||
|
"\n",
|
||||||
|
" road_type weekday \n",
|
||||||
|
"0 Außerorts Mo-Fr \n",
|
||||||
|
"1 Autobahn Mo-Fr \n",
|
||||||
|
"2 Autobahn Mo-Fr \n",
|
||||||
|
"3 Außerorts Mo-Fr \n",
|
||||||
|
"4 Außerorts Mo-Fr "
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 52,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"df = pd.DataFrame({\n",
|
"df = pd.DataFrame({\n",
|
||||||
" 'avg_speed': avg_speed,\n",
|
" 'avg_speed': avg_speed,\n",
|
||||||
" 'shift_behavior': shift_behavior,\n",
|
" 'shift_behavior': shift_behavior,\n",
|
||||||
" 'hard_brakes': hard_brakes,\n",
|
" 'hard_brakes': hard_brakes,\n",
|
||||||
" 'speeding': speeding,\n",
|
" 'speeding': speeding,\n",
|
||||||
" 'road_type': road_type,\n",
|
|
||||||
" 'weather': weather,\n",
|
" 'weather': weather,\n",
|
||||||
" 'trip_distance': trip_distance,\n",
|
" 'trip_distance': trip_distance,\n",
|
||||||
|
" 'road_type': road_type,\n",
|
||||||
" 'weekday': weekday\n",
|
" 'weekday': weekday\n",
|
||||||
"})\n",
|
"})\n",
|
||||||
"\n",
|
"\n",
|
||||||
"df.head()\n"
|
"df.head()\n"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "f06e7c4d",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### VIF der Abhängigkeiten prüfen"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 53,
|
||||||
|
"id": "6bfb199a",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"Variance Inflation Factor (VIF) für ausgewählte Variabeln:\n",
|
||||||
|
" features VIF Factor\n",
|
||||||
|
"0 avg_speed 5.768491\n",
|
||||||
|
"1 trip_distance 2.469313\n",
|
||||||
|
"2 road_type_Außerorts 2.238422\n",
|
||||||
|
"3 speeding_manchmal 1.900947\n",
|
||||||
|
"4 speeding_selten 1.893981\n",
|
||||||
|
"5 road_type_Innerorts 1.587059\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"vif = pd.DataFrame()\n",
|
||||||
|
"columns = ['avg_speed', 'speeding', 'trip_distance', 'road_type']\n",
|
||||||
|
"vif_df = pd.get_dummies(df[columns], drop_first=True)\n",
|
||||||
|
"vif_df = vif_df.astype(float)\n",
|
||||||
|
"vif['features'] = vif_df.columns\n",
|
||||||
|
"vif['VIF Factor'] = [variance_inflation_factor(vif_df.values, i) for i in range(vif_df.shape[1])]\n",
|
||||||
|
"vif = vif.sort_values('VIF Factor', ascending=False).reset_index(drop=True)\n",
|
||||||
|
"print(\"\\nVariance Inflation Factor (VIF) für ausgewählte Variabeln:\")\n",
|
||||||
|
"print(vif)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "base",
|
"display_name": "hft_ml",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@@ -194,7 +371,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.9"
|
"version": "3.11.10"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
|||||||
Reference in New Issue
Block a user