Skip to content

Commit 5d5bc91

Browse files
merging db more general
1 parent fb89383 commit 5d5bc91

File tree

3 files changed

+19
-38
lines changed

3 files changed

+19
-38
lines changed

src/merge_db.py

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,23 @@
11
import sqlite3
2+
from init_tables import *
23

3-
# 连接到两个源数据库
4-
conn = sqlite3.connect('movies3.db')
5-
6-
# 连接到目标数据库
4+
conn = sqlite3.connect('movies的副本.db')
75
conn_merged = sqlite3.connect('merged.db')
86

9-
# 创建游标对象
7+
108
cur1 = conn.cursor()
119
cur_merged = conn_merged.cursor()
1210

13-
# 创建目标数据库的movie表结构
14-
cur_merged.execute('''
15-
CREATE TABLE IF NOT EXISTS movies (
16-
id INTEGER PRIMARY KEY,
17-
title TEXT,
18-
rating REAL,
19-
rating_count INTEGER,
20-
pubdate TEXT,
21-
year TEXT,
22-
genres TEXT,
23-
durations TEXT,
24-
cover_url TEXT,
25-
sharing_url TEXT,
26-
countries TEXT,
27-
url TEXT,
28-
directors TEXT,
29-
actors TEXT,
30-
update_time TEXT
31-
)
32-
''')
11+
init_movies_table('merged.db')
3312

3413
# 从第一个数据库中插入数据
3514
cur1.execute('SELECT * FROM movies')
3615
rows1 = cur1.fetchall()
3716
cur_merged.executemany('''
38-
INSERT INTO movies (id, title, rating, rating_count, pubdate, year, genres, durations, cover_url,
39-
sharing_url, countries, url, directors, actors, update_time)
40-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
17+
INSERT INTO movies (id, title, rating, rating_count, pubdate,
18+
year, genres, durations, cover_url, sharing_url,
19+
countries, url, directors, actors, is_visited, update_time)
20+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
4121
ON CONFLICT(id) DO UPDATE SET
4222
title=excluded.title,
4323
rating=excluded.rating,

src/movies_crawler.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,19 @@ def get_recommendations(db_path, data_count, round):
150150

151151
log_message(f"getting related movies and doulists for movie: {movie_title}",'movie_log.txt')
152152
get_related(db_path, movie_id)
153-
doulists = get_all_unvisited(db_path, 'doulists', 'id')
154-
for doulist_id in doulists:
155-
log_message(f"\tworking on doulist: {doulist_id}",'movie_log.txt')
156-
crawl_entire_doulist(db_path, data_count, doulist_id)
157-
mark_visited(db_path, 'doulists', 'id', doulist_id)
153+
# doulists = get_all_unvisited(db_path, 'doulists', 'id')
154+
# for doulist_id in doulists:
155+
# log_message(f"\tworking on doulist: {doulist_id}",'movie_log.txt')
156+
# crawl_entire_doulist(db_path, data_count, doulist_id)
157+
# mark_visited(db_path, 'doulists', 'id', doulist_id)
158158

159159
log_message(f"getting related user comments for movie: {movie_title}",'movie_log.txt')
160160
get_interests(db_path, movie_id)
161-
users = get_all_unvisited(db_path, 'interests', 'user_id')
162-
for user_id in users:
163-
log_message(f"\tworking on user:{user_id}",'movie_log.txt')
164-
crawl_entire_user(db_path, data_count, user_id)
165-
mark_visited(db_path, 'interests', 'user_id', user_id)
161+
# users = get_all_unvisited(db_path, 'interests', 'user_id')
162+
# for user_id in users:
163+
# log_message(f"\tworking on user:{user_id}",'movie_log.txt')
164+
# crawl_entire_user(db_path, data_count, user_id)
165+
# mark_visited(db_path, 'interests', 'user_id', user_id)
166166

167167
mark_visited(db_path, 'movies', 'id', movie_id)
168168

src/test.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,4 @@
4646
insert_doulists('movies.db', data)
4747
start=10
4848
print(start > data.get('total', start))
49+

0 commit comments

Comments
 (0)