Skip to content

Commit b5f2693

Browse files
author
Jan Gutsche
committed
WIP: Analyze raw data
1 parent 4cffe34 commit b5f2693

File tree

1 file changed

+290
-7
lines changed

1 file changed

+290
-7
lines changed

ddlitlab2024/dataset/analyze_raw_data.ipynb

Lines changed: 290 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,326 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": null,
13+
"execution_count": 1,
1414
"metadata": {},
1515
"outputs": [],
1616
"source": [
1717
"import sqlite3\n",
18+
"from concurrent.futures import ThreadPoolExecutor\n",
19+
"from contextlib import contextmanager\n",
20+
"from datetime import datetime, timedelta\n",
21+
"from pathlib import Path\n",
22+
"from typing import TypedDict\n",
1823
"\n",
1924
"import pandas as pd\n",
25+
"from mcap.reader import make_reader\n",
26+
"from mcap.summary import Summary\n",
27+
"from mcap_ros2.decoder import DecoderFactory\n",
2028
"\n",
2129
"from ddlitlab2024 import DB_PATH"
2230
]
2331
},
2432
{
2533
"cell_type": "code",
26-
"execution_count": null,
34+
"execution_count": 2,
2735
"metadata": {},
2836
"outputs": [],
2937
"source": [
30-
"con = sqlite3.connect(DB_PATH)"
38+
"BASE_DIR = Path(\"/srv/rosbags\") # Same as https://data.bit-bots.de/ROSbags/"
3139
]
3240
},
3341
{
3442
"cell_type": "code",
35-
"execution_count": null,
43+
"execution_count": 3,
44+
"metadata": {},
45+
"outputs": [],
46+
"source": [
47+
"DB_PATH = Path(\"/srv/ssd_nvm/dataset/ddlitlab2024/db/robocup_2024_german_open_2025.sqlite3\") # TODO Remove me"
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": 4,
53+
"metadata": {},
54+
"outputs": [],
55+
"source": [
56+
"# Connect read-only to the SQLite database\n",
57+
"conn = sqlite3.connect(f\"file:{DB_PATH}?mode=ro\", uri=True)"
58+
]
59+
},
60+
{
61+
"cell_type": "code",
62+
"execution_count": 5,
3663
"metadata": {},
3764
"outputs": [],
3865
"source": [
3966
"# Get mcaps used by the database\n",
4067
"query = \"\"\"\n",
4168
"SELECT original_file FROM Recording;\n",
4269
"\"\"\"\n",
43-
"df = pd.read_sql_query(query, con)"
70+
"files = pd.DataFrame()\n",
71+
"files[\"mcap_file_name\"] = pd.read_sql_query(query, conn)"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": 6,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"# Find the full file paths recursively contained in the following directory\n",
81+
"def find_file(file_name):\n",
82+
" candidates = []\n",
83+
" for path in BASE_DIR.rglob(file_name):\n",
84+
" candidates.append(path)\n",
85+
" match len(candidates):\n",
86+
" case 0:\n",
87+
" print(f\"File {file_name} not found\")\n",
88+
" return None\n",
89+
" case 1:\n",
90+
" return candidates[0]\n",
91+
" case 2:\n",
92+
" # Use the \"untrimmed\" version of the file if it exists\n",
93+
" for candidate in candidates:\n",
94+
" if \"untrimmed\" in str(candidate):\n",
95+
" return candidate\n",
96+
" print(f\"File {file_name} found multiple times: {candidates}\")\n",
97+
" return None\n",
98+
" case _:\n",
99+
" print(f\"File {file_name} found multiple times: {candidates}\")\n",
100+
" return None\n",
101+
"\n",
102+
"\n",
103+
"# Find the full file paths for each mcap file\n",
104+
"files[\"mcap_file_path\"] = files[\"mcap_file_name\"].apply(find_file)\n",
105+
"assert files[\"mcap_file_path\"].notnull().all(), \"Some mcap files were not found\""
106+
]
107+
},
108+
{
109+
"cell_type": "markdown",
110+
"metadata": {},
111+
"source": [
112+
"## Gather metrics:\n",
113+
"- Amount [B]\n",
114+
"- Duration [s]\n",
115+
"- Image messages\n",
116+
"- IMU messages\n",
117+
"- Joint state messages\n",
118+
"- Joint command messages\n",
119+
"- Game state messages\n",
120+
"- Total number of (previous) messages"
121+
]
122+
},
123+
{
124+
"cell_type": "code",
125+
"execution_count": 7,
126+
"metadata": {},
127+
"outputs": [],
128+
"source": [
129+
"# Preparations\n",
130+
"\n",
131+
"\n",
132+
"class Metric(TypedDict):\n",
133+
" mcap_file_name: str\n",
134+
" mcap_file_path: Path\n",
135+
" mcap_file_size_B: int\n",
136+
" duration_s: float\n",
137+
" num_images: int\n",
138+
" num_imu: int\n",
139+
" num_joint_states: int\n",
140+
" num_joint_commands: int\n",
141+
" num_game_states: int\n",
142+
" num_messages: int\n",
143+
"\n",
144+
"\n",
145+
"@contextmanager\n",
146+
"def _mcap_reader(mcap_file_path: Path):\n",
147+
" with open(mcap_file_path, \"rb\") as f:\n",
148+
" yield make_reader(f, decoder_factories=[DecoderFactory()])\n",
149+
"\n",
150+
"\n",
151+
"def duration(summary: Summary) -> timedelta:\n",
152+
" first_msg_start_time = None\n",
153+
" last_msg_end_time = None\n",
154+
"\n",
155+
" for chunk_index in summary.chunk_indexes:\n",
156+
" if first_msg_start_time is None or chunk_index.message_start_time < first_msg_start_time:\n",
157+
" first_msg_start_time = chunk_index.message_start_time\n",
158+
" if last_msg_end_time is None or chunk_index.message_end_time > last_msg_end_time:\n",
159+
" last_msg_end_time = chunk_index.message_end_time\n",
160+
"\n",
161+
" assert first_msg_start_time is not None, \"No start time found in the MCAP file\"\n",
162+
" assert last_msg_end_time is not None, \"No end time found in the MCAP file\"\n",
163+
"\n",
164+
" return datetime.fromtimestamp(last_msg_end_time / 1e9) - datetime.fromtimestamp(first_msg_start_time / 1e9)\n",
165+
"\n",
166+
"\n",
167+
"USED_TOPICS = [\n",
168+
" \"/DynamixelController/command\",\n",
169+
" \"/camera/image_proc\",\n",
170+
" \"/camera/image_to_record\",\n",
171+
" \"/gamestate\",\n",
172+
" \"/imu/data\",\n",
173+
" \"/joint_states\",\n",
174+
" \"/tf\",\n",
175+
"]"
176+
]
177+
},
178+
{
179+
"cell_type": "code",
180+
"execution_count": null,
181+
"metadata": {},
182+
"outputs": [
183+
{
184+
"name": "stdout",
185+
"output_type": "stream",
186+
"text": [
187+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T16:30:37/ID_donna_2024-07-19T16:30:37_0.mcap\n",
188+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T11:31:28/ID_jack_2024-07-19T11:31:28_0.mcap\n",
189+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-17T15:38:04/ID_jack_2024-07-17T15:38:04_0.mcap\n",
190+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-18T13:29:30/ID_rory_2024-07-18T13:29:30_0.mcap\n",
191+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T11:48:16/ID_donna_2024-07-19T11:48:16_0.mcap\n",
192+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-18T13:54:46/ID_rory_2024-07-18T13:54:46_0.mcap\n",
193+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T11:08:32/ID_jack_2024-07-20T11:08:32_0.mcap\n",
194+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T09:50:25/ID_jack_2024-07-19T09:50:25_0.mcap\n",
195+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T17:27:57/ID_jack_2024-07-19T17:27:57_0.mcap\n",
196+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T18:29:22/ID_donna_2024-07-18T18:29:22_0.mcap\n",
197+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T18:29:35/ID_jack_2024-07-18T18:29:35_0.mcap\n",
198+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T18:31:02/ID_donna_2024-07-18T18:31:02_0.mcap\n",
199+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T18:42:01/ID_jack_2024-07-18T18:42:01_0.mcap\n",
200+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T16:17:13/ID_donna_2024-07-19T16:17:13_0.mcap\n",
201+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T11:30:19/ID_donna_2024-07-19T11:30:19_0.mcap\n",
202+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T17:58:04/ID_jack_2024-07-19T17:58:04_0.mcap\n",
203+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T19:28:32/ID_donna_2024-07-18T19:28:32_0.mcap\n",
204+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T18:47:57/ID_jack_2024-07-18T18:47:57_0.mcap\n",
205+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T15:56:08/ID_jack_2024-07-20T15:56:08_0.mcap\n",
206+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-20T15:27:30/ID_rory_2024-07-20T15:27:30_0.mcap\n",
207+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-18T13:52:36/ID_rory_2024-07-18T13:52:36_0.mcap\n",
208+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T18:38:45/ID_jack_2024-07-18T18:38:45_0.mcap\n",
209+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T18:47:16/ID_donna_2024-07-18T18_47_16_0.mcap\n",
210+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-20T15:45:49/ID_rory_2024-07-20T15:45:49_0.mcap\n",
211+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-20T15:34:27/ID_rory_2024-07-20T15:34:27_0.mcap\n",
212+
"Processing /srv/rosbags/robocup_2024/ID_amy_2024-07-18T10:26:27/ID_amy_2024-07-18T10:26:27_0.mcap\n",
213+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T11:47:41/ID_jack_2024-07-19T11:47:41_0.mcap\n",
214+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-19T17:45:15/ID_rory_2024-07-19T17:45:15_0.mcap\n",
215+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T19:48:52/ID_donna_2024-07-18T19:48:52_0.mcap\n",
216+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T17:11:35/ID_donna_2024-07-19T17:11:35_0.mcap\n",
217+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-19T17:27:54/ID_rory_2024-07-19T17:27:54_0.mcap\n",
218+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T11:29:50/ID_jack_2024-07-19T11:29:50_0.mcap\n",
219+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T15:29:51/ID_donna_2024-07-18T15:29:51_0.mcap\n",
220+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T17:05:54/ID_donna_2024-07-19T17:05:54_0.mcap\n",
221+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T15:27:14/ID_jack_2024-07-20T15:27:14_0.mcap\n",
222+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-20T15:26:55/ID_donna_2024-07-20T15:26:55_0.mcap\n",
223+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T19:56:19/ID_jack_2024-07-18T19:56:19_0.mcap\n",
224+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-18T10:50:54/ID_rory_2024-07-18T10:50:54_0.mcap\n",
225+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-20T15:49:20/ID_donna_2024-07-20T15:49:20_0.mcap\n",
226+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T17:45:13/ID_donna_2024-07-19T17:45:13_0.mcap\n",
227+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T18:01:33/ID_jack_2024-07-19T18:01:33_0.mcap\n",
228+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T15:21:13/ID_donna_2024-07-18T15:21:13_0.mcap\n",
229+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T11:19:40/ID_jack_2024-07-20T11:19:40_0.mcap\n",
230+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-17T18:34:41/ID_jack_2024-07-17T18:34:41_0.mcap\n",
231+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T19:51:25/ID_jack_2024-07-18T19:51:25_0.mcap\n",
232+
"Processing /srv/rosbags/robocup_2024/ID_amy_2024-07-18T10:32:52/ID_amy_2024-07-18T10:32:52_0.mcap\n",
233+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-17T15:35:46/ID_jack_2024-07-17T15:35:46_0.mcap\n",
234+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T15:48:43/ID_jack_2024-07-20T15:48:43_0.mcap\n",
235+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T17:35:46/ID_donna_2024-07-19T17:35:46_0.mcap\n",
236+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-19T18:00:20/ID_donna_2024-07-19T18:00:20_0.mcap\n",
237+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T19:28:29/ID_jack_2024-07-18T19:28:29_0.mcap\n",
238+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T19:46:13/ID_jack_2024-07-18T19:46:13_0.mcap\n",
239+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T19:38:20/ID_jack_2024-07-18T19:38:20_0.mcap\n",
240+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-17T19:16:57/ID_rory_2024-07-17T19:16:57_0.mcap\n",
241+
"Processing /srv/rosbags/robocup_2024/ID_donna_2024-07-18T15:00:02/ID_donna_2024-07-18T15:00:02_0.mcap\n",
242+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-18T18:31:03/ID_jack_2024-07-18T18:31:03_0.mcap\n",
243+
"Processing /srv/rosbags/robocup_2024/ID_rory_2024-07-20T15:40:46/ID_rory_2024-07-20T15:40:46_0.mcap\n",
244+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-20T15:47:38/ID_jack_2024-07-20T15:47:38_0.mcap\n",
245+
"Processing /srv/rosbags/robocup_2024/ID_jack_2024-07-19T17:46:07/ID_jack_2024-07-19T17:46:07_0.mcap\n",
246+
"Processing /srv/rosbags/german_open_2025/original_untrimmed/ID_rory_2025-03-16T14:02:56/ID_rory_2025-03-16T14:02:56_0.mcap\n"
247+
]
248+
}
249+
],
250+
"source": [
251+
"def get_metrics(file_path: Path) -> Metric:\n",
252+
" with _mcap_reader(file_path) as reader:\n",
253+
" summary = reader.get_summary()\n",
254+
" assert summary is not None, \"Summary is None\"\n",
255+
"\n",
256+
" has_imu_data = any(channel.topic == \"/imu/data\" for channel in summary.channels.values())\n",
257+
"\n",
258+
" metric: Metric = {\n",
259+
" \"mcap_file_name\": file_path.name,\n",
260+
" \"mcap_file_path\": file_path,\n",
261+
" \"mcap_file_size_B\": file_path.stat().st_size,\n",
262+
" \"duration_s\": duration(summary).total_seconds(),\n",
263+
" \"num_images\": 0,\n",
264+
" \"num_imu\": 0,\n",
265+
" \"num_joint_states\": 0,\n",
266+
" \"num_joint_commands\": 0,\n",
267+
" \"num_game_states\": 0,\n",
268+
" \"num_messages\": 0,\n",
269+
" }\n",
270+
"\n",
271+
" for _, channel, _, ros_msg in reader.iter_decoded_messages(topics=USED_TOPICS):\n",
272+
" match channel.topic:\n",
273+
" case \"/camera/image_proc\":\n",
274+
" metric[\"num_images\"] += 1\n",
275+
" case \"/imu/data\" if has_imu_data:\n",
276+
" metric[\"num_imu\"] += 1\n",
277+
" case \"/joint_states\":\n",
278+
" metric[\"num_joint_states\"] += 1\n",
279+
" case \"/DynamixelController/command\":\n",
280+
" metric[\"num_joint_commands\"] += 1\n",
281+
" case \"/gamestate\":\n",
282+
" metric[\"num_game_states\"] += 1\n",
283+
" case \"/tf\" if not has_imu_data:\n",
284+
" for tf_msg in ros_msg.transforms:\n",
285+
" if tf_msg.child_frame_id == \"base_footprint\" and tf_msg.header.frame_id == \"base_link\":\n",
286+
" metric[\"num_imu\"] += 1\n",
287+
" case _:\n",
288+
" pass\n",
289+
"\n",
290+
" metric[\"num_messages\"] = (\n",
291+
" metric[\"num_images\"]\n",
292+
" + metric[\"num_imu\"]\n",
293+
" + metric[\"num_joint_states\"]\n",
294+
" + metric[\"num_joint_commands\"]\n",
295+
" + metric[\"num_game_states\"]\n",
296+
" )\n",
297+
"\n",
298+
" return metric\n",
299+
"\n",
300+
"\n",
301+
"# Get the metrics for each mcap file in parallel\n",
302+
"def get_metrics_for_all_files():\n",
303+
" metrics = []\n",
304+
"\n",
305+
" def process_file(row):\n",
306+
" file_path = row[\"mcap_file_path\"]\n",
307+
" print(f\"Processing {file_path}\")\n",
308+
" metric = get_metrics(file_path)\n",
309+
" return metric\n",
310+
"\n",
311+
" with ThreadPoolExecutor() as executor:\n",
312+
" metrics = list(executor.map(process_file, [row for _, row in files.iterrows()]))\n",
313+
"\n",
314+
" return pd.DataFrame(metrics)\n",
315+
"\n",
316+
"\n",
317+
"df = get_metrics_for_all_files()"
318+
]
319+
},
320+
{
321+
"cell_type": "code",
322+
"execution_count": null,
323+
"metadata": {},
324+
"outputs": [],
325+
"source": [
326+
"df"
44327
]
45328
}
46329
],
47330
"metadata": {
48331
"kernelspec": {
49-
"display_name": "Python 3",
332+
"display_name": "ddlitlab2024-bN9lk4bc-py3.10",
50333
"language": "python",
51334
"name": "python3"
52335
},
@@ -60,7 +343,7 @@
60343
"name": "python",
61344
"nbconvert_exporter": "python",
62345
"pygments_lexer": "ipython3",
63-
"version": "3.13.1"
346+
"version": "3.10.12"
64347
}
65348
},
66349
"nbformat": 4,

0 commit comments

Comments
 (0)