diff --git a/football_matches/scraping.ipynb b/football_matches/scraping.ipynb index 23609e6..2b37ad0 100644 --- a/football_matches/scraping.ipynb +++ b/football_matches/scraping.ipynb @@ -620,6 +620,15 @@ " data = requests.get(f\"https://fbref.com{links[0]}\")\n", " shooting = pd.read_html(data.text, match=\"Shooting\")[0]\n", " shooting.columns = shooting.columns.droplevel()\n", + " \n", + " newShootingCols = [\"Date\", \"Sh\", \"SoT\", \"Dist\", \"FK\", \"PK\", \"PKatt\"]\n", + " \n", + " # In less recent seasons, some of the expected shooting columns might not be present.\n", + " # This loop avoids a merging error by creating empty columns\n", + " for newCol in newShootingCols:\n", + " if newCol not in shooting.columns:\n", + " shooting[newCol] = np.nan\n", + " \n", " try:\n", " team_data = matches.merge(shooting[[\"Date\", \"Sh\", \"SoT\", \"Dist\", \"FK\", \"PK\", \"PKatt\"]], on=\"Date\")\n", " except ValueError:\n",