update abhaengigkeit
This commit is contained in:
@@ -45,7 +45,7 @@
|
||||
" Anzahl starker Bremsvorgänge mit hoher Verzögerung (>0,3 g). Modellierung z.B. als Poisson-verteilte Zufallsvariable mit geringem Mittelwert (wenige Ereignisse pro 100 km). Harte Bremsmanöver korrelieren mit aggressiver Fahrweise und sind prädiktiv für Fahrerwechsel. \n",
|
||||
" → Quelle: [Tesla Safety Score Definition](https://www.findmyelectric.com/blog/tesla-safety-score-beta-explained)\n",
|
||||
"\n",
|
||||
"4. **Geschwindigkeitsüberschreitungen (ordinal):** \n",
|
||||
"4. **Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien):** \n",
|
||||
" Häufigkeit bzw. Ausmaß, mit dem Tempolimits überschritten werden. Kategorisierung z.B. in: \n",
|
||||
" - *selten*, \n",
|
||||
" - *manchmal*, \n",
|
||||
@@ -58,24 +58,26 @@
|
||||
"\n",
|
||||
"#### Nicht erklärende Variablen:\n",
|
||||
"\n",
|
||||
"5. **Straßentyp (nominal, 3 Kategorien):** \n",
|
||||
" - *Autobahn* (~30–33 %) \n",
|
||||
" - *Außerorts* (~43 %) \n",
|
||||
" - *Innerorts* (~24–27 %) \n",
|
||||
" → Modelliert als Multinomialverteilung. Kontextvariable ohne direkte Prädiktionswirkung. \n",
|
||||
" → Quelle: [Klimaschutzinstrumente im Verkehr](https://openumwelt.de/server/api/core/bitstreams/07bd23f9-ff0a-41e5-b89b-8d7baf0a5c36/content)\n",
|
||||
"\n",
|
||||
"6. **Wetterbedingungen (nominal, 3 Kategorien):** \n",
|
||||
"5. **Wetterbedingungen (nominal, 3 Kategorien):** \n",
|
||||
" - *trocken* (~70–75 %) \n",
|
||||
" - *nass* (~20–25 %) \n",
|
||||
" - *winterlich* (<5 %) \n",
|
||||
" → Modelliert als Multinomial. Kontextvariable. \n",
|
||||
" Die Einteilung basiert auf Daten des Statistischen Bundesamtes: Etwa 70–75 % aller Fahrten finden unter trockenen Bedingungen statt, 20–25 % bei Nässe und unter 5 % bei Schnee oder Glätte.\n",
|
||||
"\n",
|
||||
"7. **Fahrstrecke (metrisch):** \n",
|
||||
"6. **Fahrstrecke (metrisch):** \n",
|
||||
" Länge der Fahrt (z.B. lognormalverteilt um 12 km). Kontextvariable, nicht direkt erklärend. \n",
|
||||
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"7. **Straßentyp (nominal, 3 Kategorien):** \n",
|
||||
" - *Autobahn* (~30–33 %) \n",
|
||||
" - *Außerorts* (~43 %) \n",
|
||||
" - *Innerorts* (~24–27 %) \n",
|
||||
" → Modelliert als Multinomialverteilung. Kontextvariable ohne direkte Prädiktionswirkung. \n",
|
||||
" → Quelle: [Klimaschutzinstrumente im Verkehr](https://openumwelt.de/server/api/core/bitstreams/07bd23f9-ff0a-41e5-b89b-8d7baf0a5c36/content)\n",
|
||||
"\n",
|
||||
"8. **Wochentag (nominal, 2–3 Kategorien):** \n",
|
||||
" Z.B. *Werktag* vs *Wochenende* oder *Mo–Fr*, *Sa*, *So*. Modellierung als kategorische Variable. Kontextvariable. \n",
|
||||
" → Quelle: [Mobilität in Deutschland 2017 – Ergebnisbericht](https://www.bmv.de/SharedDocs/DE/Anlage/G/mid-ergebnisbericht.pdf)\n"
|
||||
@@ -91,7 +93,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 50,
|
||||
"id": "97ba29a9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -102,6 +104,8 @@
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"from scipy.stats import norm, multinomial, poisson, lognorm, bernoulli\n",
|
||||
"import statsmodels.api as sm\n",
|
||||
"from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
|
||||
"\n",
|
||||
"np.random.seed(11)\n",
|
||||
"N = 1_000_000"
|
||||
@@ -117,7 +121,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 51,
|
||||
"id": "61247da5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@@ -131,21 +135,40 @@
|
||||
"# 3. Harte Bremsmanöver (metrisch, Poisson)\n",
|
||||
"hard_brakes = np.random.poisson(lam=2, size=N)\n",
|
||||
"\n",
|
||||
"# 4. Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien)\n",
|
||||
"speeding = np.random.choice(['selten', 'manchmal', 'häufig'], size=N, p=[0.6, 0.3, 0.1])\n",
|
||||
"# 4. Geschwindigkeitsüberschreitungen (ordinal, 3 Kategorien) – Abhängigkeit von avg_speed\n",
|
||||
"def softmax(x):\n",
|
||||
" e_x = np.exp(x - np.max(x, axis=1, keepdims=True))\n",
|
||||
" return e_x / e_x.sum(axis=1, keepdims=True)\n",
|
||||
"\n",
|
||||
"# 5. Straßentyp (nominal, Kontext)\n",
|
||||
"road_type = np.random.choice(['Autobahn', 'Außerorts', 'Innerorts'], size=N, p=[0.32, 0.43, 0.25])\n",
|
||||
"speed_scaled = (avg_speed - avg_speed.min()) / (avg_speed.max() - avg_speed.min())\n",
|
||||
"strength = 0.01\n",
|
||||
"logits = np.zeros((N, 3))\n",
|
||||
"logits[:, 0] = 1 - strength * speed_scaled # selten\n",
|
||||
"logits[:, 1] = 1 # manchmal\n",
|
||||
"logits[:, 2] = 1 + strength * speed_scaled # häufig\n",
|
||||
"probs = softmax(logits)\n",
|
||||
"speeding = [np.random.choice(['selten', 'manchmal', 'häufig'], p=p) for p in probs]\n",
|
||||
"\n",
|
||||
"# 6. Wetterbedingungen (nominal, Kontext)\n",
|
||||
"# 5. Wetterbedingungen (nominal, Kontext)\n",
|
||||
"weather = np.random.choice(['trocken', 'nass', 'winterlich'], size=N, p=[0.75, 0.2, 0.05])\n",
|
||||
"\n",
|
||||
"# 7. Fahrstrecke (metrisch, lognormal)\n",
|
||||
"# 6. Fahrstrecke (metrisch, lognormal)\n",
|
||||
"mu, sigma = np.log(12), 0.7\n",
|
||||
"trip_distance = np.random.lognormal(mean=mu, sigma=sigma, size=N)\n",
|
||||
"\n",
|
||||
"# 7. Straßentyp (nominal, Kontext) – schwächere Abhängigkeit von trip_distance\n",
|
||||
"road_probs = []\n",
|
||||
"for dist in trip_distance:\n",
|
||||
" if dist < 5:\n",
|
||||
" road_probs.append([0.5, 0.35, 0.15])\n",
|
||||
" elif dist < 20:\n",
|
||||
" road_probs.append([0.2, 0.5, 0.3])\n",
|
||||
" else:\n",
|
||||
" road_probs.append([0.1, 0.4, 0.5])\n",
|
||||
"road_type = [np.random.choice(['Innerorts', 'Außerorts', 'Autobahn'], p=prob) for prob in road_probs]\n",
|
||||
"\n",
|
||||
"# 8. Wochentag (nominal, Kontext)\n",
|
||||
"weekday = np.random.choice(['Mo-Fr', 'Sa', 'So'], size=N, p=[0.7, 0.15, 0.15])\n"
|
||||
"weekday = np.random.choice(['Mo-Fr', 'Sa', 'So'], size=N, p=[0.7, 0.15, 0.15])"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -158,29 +181,183 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 52,
|
||||
"id": "f8e0efb0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>avg_speed</th>\n",
|
||||
" <th>shift_behavior</th>\n",
|
||||
" <th>hard_brakes</th>\n",
|
||||
" <th>speeding</th>\n",
|
||||
" <th>weather</th>\n",
|
||||
" <th>trip_distance</th>\n",
|
||||
" <th>road_type</th>\n",
|
||||
" <th>weekday</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>64.494547</td>\n",
|
||||
" <td>früh</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>manchmal</td>\n",
|
||||
" <td>trocken</td>\n",
|
||||
" <td>30.449962</td>\n",
|
||||
" <td>Außerorts</td>\n",
|
||||
" <td>Mo-Fr</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>44.139270</td>\n",
|
||||
" <td>früh</td>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>häufig</td>\n",
|
||||
" <td>trocken</td>\n",
|
||||
" <td>11.911103</td>\n",
|
||||
" <td>Autobahn</td>\n",
|
||||
" <td>Mo-Fr</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>42.154349</td>\n",
|
||||
" <td>spät</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>selten</td>\n",
|
||||
" <td>trocken</td>\n",
|
||||
" <td>6.086055</td>\n",
|
||||
" <td>Autobahn</td>\n",
|
||||
" <td>Mo-Fr</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>20.466814</td>\n",
|
||||
" <td>normal</td>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>häufig</td>\n",
|
||||
" <td>trocken</td>\n",
|
||||
" <td>5.732684</td>\n",
|
||||
" <td>Außerorts</td>\n",
|
||||
" <td>Mo-Fr</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>46.917154</td>\n",
|
||||
" <td>spät</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>selten</td>\n",
|
||||
" <td>trocken</td>\n",
|
||||
" <td>7.256758</td>\n",
|
||||
" <td>Außerorts</td>\n",
|
||||
" <td>Mo-Fr</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" avg_speed shift_behavior hard_brakes speeding weather trip_distance \\\n",
|
||||
"0 64.494547 früh 2 manchmal trocken 30.449962 \n",
|
||||
"1 44.139270 früh 3 häufig trocken 11.911103 \n",
|
||||
"2 42.154349 spät 2 selten trocken 6.086055 \n",
|
||||
"3 20.466814 normal 5 häufig trocken 5.732684 \n",
|
||||
"4 46.917154 spät 2 selten trocken 7.256758 \n",
|
||||
"\n",
|
||||
" road_type weekday \n",
|
||||
"0 Außerorts Mo-Fr \n",
|
||||
"1 Autobahn Mo-Fr \n",
|
||||
"2 Autobahn Mo-Fr \n",
|
||||
"3 Außerorts Mo-Fr \n",
|
||||
"4 Außerorts Mo-Fr "
|
||||
]
|
||||
},
|
||||
"execution_count": 52,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = pd.DataFrame({\n",
|
||||
" 'avg_speed': avg_speed,\n",
|
||||
" 'shift_behavior': shift_behavior,\n",
|
||||
" 'hard_brakes': hard_brakes,\n",
|
||||
" 'speeding': speeding,\n",
|
||||
" 'road_type': road_type,\n",
|
||||
" 'weather': weather,\n",
|
||||
" 'trip_distance': trip_distance,\n",
|
||||
" 'road_type': road_type,\n",
|
||||
" 'weekday': weekday\n",
|
||||
"})\n",
|
||||
"\n",
|
||||
"df.head()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "f06e7c4d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### VIF der Abhängigkeiten prüfen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 53,
|
||||
"id": "6bfb199a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"Variance Inflation Factor (VIF) für ausgewählte Variabeln:\n",
|
||||
" features VIF Factor\n",
|
||||
"0 avg_speed 5.768491\n",
|
||||
"1 trip_distance 2.469313\n",
|
||||
"2 road_type_Außerorts 2.238422\n",
|
||||
"3 speeding_manchmal 1.900947\n",
|
||||
"4 speeding_selten 1.893981\n",
|
||||
"5 road_type_Innerorts 1.587059\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"vif = pd.DataFrame()\n",
|
||||
"columns = ['avg_speed', 'speeding', 'trip_distance', 'road_type']\n",
|
||||
"vif_df = pd.get_dummies(df[columns], drop_first=True)\n",
|
||||
"vif_df = vif_df.astype(float)\n",
|
||||
"vif['features'] = vif_df.columns\n",
|
||||
"vif['VIF Factor'] = [variance_inflation_factor(vif_df.values, i) for i in range(vif_df.shape[1])]\n",
|
||||
"vif = vif.sort_values('VIF Factor', ascending=False).reset_index(drop=True)\n",
|
||||
"print(\"\\nVariance Inflation Factor (VIF) für ausgewählte Variabeln:\")\n",
|
||||
"print(vif)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"display_name": "hft_ml",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@@ -194,7 +371,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
Reference in New Issue
Block a user