fix minor errors when running all ipynb
Browse files- src/02_basics/app_market/google-play-search.qmd +1 -1
- src/03_low_code/app_market_scraping/app_market_scraping.ipynb +10 -7
- src/03_low_code/video_transcripts/get_videos_for_youtube_channels.ipynb +2 -0
- src/04_use_case/forum/buergergeld_forum.ipynb +25 -0
- src/04_use_case/laws/Gesetze_im_Internet_Aktualitätendienst.ipynb +1 -1
src/02_basics/app_market/google-play-search.qmd
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: "Google-Play-App-Suche"
|
3 |
-
description: "Dieses Tool durchsucht den Google Play Store nach Apps
|
4 |
image: _9d81d3d2-f372-476b-beb6-fe6a4841739b.jpeg
|
5 |
---
|
6 |
|
|
|
1 |
---
|
2 |
title: "Google-Play-App-Suche"
|
3 |
+
description: "Dieses Tool durchsucht den Google Play Store nach Apps, basierend auf einem Suchbegriff, und in einer übersichtliche Ergebnisliste darstellen."
|
4 |
image: _9d81d3d2-f372-476b-beb6-fe6a4841739b.jpeg
|
5 |
---
|
6 |
|
src/03_low_code/app_market_scraping/app_market_scraping.ipynb
CHANGED
@@ -70,10 +70,10 @@
|
|
70 |
"\n",
|
71 |
"Eine CSV-Datei (`app_urls.csv`) erstellen, die eine Spalte url enthält, welche die URLs der Google Play Store Apps auflistet. Beispiel:\n",
|
72 |
"\n",
|
73 |
-
"
|
74 |
-
"
|
75 |
-
"https://play.google.com/store/apps/details?id=com.
|
76 |
-
"https://play.google.com/store/apps/details?id=
|
77 |
"```\n",
|
78 |
"\n",
|
79 |
"Die CSV-Datei in ein Pandas DataFrame einlesen:"
|
@@ -87,8 +87,11 @@
|
|
87 |
"source": [
|
88 |
"import pandas as pd\n",
|
89 |
"\n",
|
90 |
-
"# CSV-Datei einlesen\n",
|
91 |
-
"df = pd.read_csv('app_urls.csv')"
|
|
|
|
|
|
|
92 |
]
|
93 |
},
|
94 |
{
|
@@ -116,7 +119,7 @@
|
|
116 |
"app_info_list = []\n",
|
117 |
"\n",
|
118 |
"# Loop über die URLs in der CSV-Datei\n",
|
119 |
-
"for url in df['
|
120 |
" app_id = extract_app_id(url)\n",
|
121 |
" app_info = app(app_id, lang='en', country='us')\n",
|
122 |
" app_info_list.append(app_info)\n",
|
|
|
70 |
"\n",
|
71 |
"Eine CSV-Datei (`app_urls.csv`) erstellen, die eine Spalte url enthält, welche die URLs der Google Play Store Apps auflistet. Beispiel:\n",
|
72 |
"\n",
|
73 |
+
"```\n",
|
74 |
+
"Title,Developer,Score,App ID,Link\n",
|
75 |
+
"pflegecompass,compass private pflegeberatung,,com.app.pflegecompass,https://play.google.com/store/apps/details?id=com.app.pflegecompass\n",
|
76 |
+
"Pflege App,De Rose Care Tech,,altenpflege.quiz,https://play.google.com/store/apps/details?id=altenpflege.quiz\n",
|
77 |
"```\n",
|
78 |
"\n",
|
79 |
"Die CSV-Datei in ein Pandas DataFrame einlesen:"
|
|
|
87 |
"source": [
|
88 |
"import pandas as pd\n",
|
89 |
"\n",
|
90 |
+
"# CSV-Datei lokal einlesen\n",
|
91 |
+
"# df = pd.read_csv('app_urls.csv')\n",
|
92 |
+
"\n",
|
93 |
+
"# CSV-Beispieldatei laden\n",
|
94 |
+
"df = pd.read_csv(\"https://huggingface.co/spaces/datenwerkzeuge/CDL-Webscraping-Workshop-2025/resolve/main/src/assets/app_urls.csv\")"
|
95 |
]
|
96 |
},
|
97 |
{
|
|
|
119 |
"app_info_list = []\n",
|
120 |
"\n",
|
121 |
"# Loop über die URLs in der CSV-Datei\n",
|
122 |
+
"for url in df['Link']:\n",
|
123 |
" app_id = extract_app_id(url)\n",
|
124 |
" app_info = app(app_id, lang='en', country='us')\n",
|
125 |
" app_info_list.append(app_info)\n",
|
src/03_low_code/video_transcripts/get_videos_for_youtube_channels.ipynb
CHANGED
@@ -69,6 +69,8 @@
|
|
69 |
"metadata": {},
|
70 |
"outputs": [],
|
71 |
"source": [
|
|
|
|
|
72 |
"# Channel-IDs in ein Pandas DataFrame umwandeln\n",
|
73 |
"df = pd.DataFrame(list(channel_ids.items()), columns=['Kanal', 'ID'])\n",
|
74 |
"\n",
|
|
|
69 |
"metadata": {},
|
70 |
"outputs": [],
|
71 |
"source": [
|
72 |
+
"import pandas as pd\n",
|
73 |
+
"\n",
|
74 |
"# Channel-IDs in ein Pandas DataFrame umwandeln\n",
|
75 |
"df = pd.DataFrame(list(channel_ids.items()), columns=['Kanal', 'ID'])\n",
|
76 |
"\n",
|
src/04_use_case/forum/buergergeld_forum.ipynb
CHANGED
@@ -95,6 +95,31 @@
|
|
95 |
"Um das wiederholte Abfragen gleicher Inhalte zu vermeiden, können die gesammelten Seiten als Ordner heruntergeladen werden: [buergergeld_forum.zip](https://huggingface.co/spaces/datenwerkzeuge/CDL-Webscraping-Workshop-2025/resolve/main/src/assets/buergergeld_forum.zip)"
|
96 |
]
|
97 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
{
|
99 |
"cell_type": "markdown",
|
100 |
"metadata": {},
|
|
|
95 |
"Um das wiederholte Abfragen gleicher Inhalte zu vermeiden, können die gesammelten Seiten als Ordner heruntergeladen werden: [buergergeld_forum.zip](https://huggingface.co/spaces/datenwerkzeuge/CDL-Webscraping-Workshop-2025/resolve/main/src/assets/buergergeld_forum.zip)"
|
96 |
]
|
97 |
},
|
98 |
+
{
|
99 |
+
"cell_type": "code",
|
100 |
+
"execution_count": null,
|
101 |
+
"metadata": {},
|
102 |
+
"outputs": [],
|
103 |
+
"source": [
|
104 |
+
"# prompt: lade die datei von https://www.google.com/url?q=https%3A%2F%2Fhuggingface.co%2Fspaces%2Fdatenwerkzeuge%2FCDL-Webscraping-Workshop-2025%2Fresolve%2Fmain%2Fsrc%2Fassets%2Fbuergergeld_forum.zip und entpacke sie im wurzel ordner \"buergergeld_forum\"\n",
|
105 |
+
"\n",
|
106 |
+
"import requests\n",
|
107 |
+
"import os\n",
|
108 |
+
"from io import BytesIO\n",
|
109 |
+
"from zipfile import ZipFile\n",
|
110 |
+
"\n",
|
111 |
+
"# Download the zip file\n",
|
112 |
+
"url = \"https://huggingface.co/spaces/datenwerkzeuge/CDL-Webscraping-Workshop-2025/resolve/main/src/assets/buergergeld_forum.zip\"\n",
|
113 |
+
"response = requests.get(url)\n",
|
114 |
+
"response.raise_for_status() # Raise an exception for bad status codes\n",
|
115 |
+
"\n",
|
116 |
+
"# Extract the zip file\n",
|
117 |
+
"with ZipFile(BytesIO(response.content)) as zip_file:\n",
|
118 |
+
" zip_file.extractall(\"buergergeld_forum\")\n",
|
119 |
+
"\n",
|
120 |
+
"print(\"File downloaded and extracted successfully!\")"
|
121 |
+
]
|
122 |
+
},
|
123 |
{
|
124 |
"cell_type": "markdown",
|
125 |
"metadata": {},
|
src/04_use_case/laws/Gesetze_im_Internet_Aktualitätendienst.ipynb
CHANGED
@@ -53,7 +53,7 @@
|
|
53 |
"source": [
|
54 |
"# prompt: installiere die bibliothek feedparser\n",
|
55 |
"\n",
|
56 |
-
"
|
57 |
]
|
58 |
},
|
59 |
{
|
|
|
53 |
"source": [
|
54 |
"# prompt: installiere die bibliothek feedparser\n",
|
55 |
"\n",
|
56 |
+
"! pip install feedparser\n"
|
57 |
]
|
58 |
},
|
59 |
{
|