Skip to content

Commit 39ffee7

Browse files
committed
Add WEKO patch to delay file content task to avoid ES version conflict
1 parent 9e0c147 commit 39ffee7

18 files changed

Lines changed: 31529 additions & 2242 deletions
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
diff --git a/modules/weko-deposit/weko_deposit/api.py b/modules/weko-deposit/weko_deposit/api.py
2+
index XXXXXXX..XXXXXXX 100644
3+
--- a/modules/weko-deposit/weko_deposit/api.py
4+
+++ b/modules/weko-deposit/weko_deposit/api.py
5+
@@ -1025,7 +1025,8 @@ class WekoDeposit(Deposit):
6+
self.revision_id)
7+
# Upload pdf file content to Elasticsearch
8+
from .tasks import extract_pdf_and_update_file_contents
9+
- extract_pdf_and_update_file_contents.apply_async((reading_targets, str(self.pid.object_uuid)))
10+
+ # Add countdown to avoid version conflict with web process
11+
+ extract_pdf_and_update_file_contents.apply_async((reading_targets, str(self.pid.object_uuid)), countdown=10)
12+
except TransportError as err:
13+
if self.jrc.get('content'):
14+
for content in self.jrc['content']:

.github/scripts/setup_weko.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ case "$COMMAND" in
4747
patch -d "${WEKO_ROOT}" -p1 < "${SCRIPT_DIR}/../patches/weko-oauth2-insecure-transport.patch"
4848
# Apply patch to fix chardet TypeError on ZIP filename handling
4949
patch -d "${WEKO_ROOT}" -p1 < "${SCRIPT_DIR}/../patches/weko-chardet-fix.patch"
50+
# Apply patch to delay file content extraction task to avoid ES version conflict
51+
patch -d "${WEKO_ROOT}" -p1 < "${SCRIPT_DIR}/../patches/weko-delay-file-content-task.patch"
5052
# Generate self-signed certificate for WEKO nginx with SAN for IP address
5153
mkdir -p "${WEKO_ROOT}/nginx/keys"
5254
openssl req -x509 -nodes -days 365 -newkey rsa:2048 \
@@ -89,6 +91,44 @@ db.session.commit()
8991
print('Updated mapping 30002')
9092
"
9193

94+
# Grant contributor access to Sample Index
95+
echo "=== Granting Contributor Access to Sample Index ==="
96+
docker compose -f "${compose_file}" exec -T web invenio shell -c '
97+
from weko_index_tree.models import Index
98+
from weko_index_tree.utils import delete_index_trees_from_redis
99+
from invenio_db import db
100+
index = Index.query.filter_by(index_name_english="Sample Index").first()
101+
if not index:
102+
raise Exception("Sample Index not found")
103+
print(f"Before: contribute_role={index.contribute_role}, browsing_role={index.browsing_role}, public_state={index.public_state}")
104+
index.contribute_role = "1,2,3,4,-98,-99"
105+
index.browsing_role = "1,2,3,4,-98,-99"
106+
index.public_state = True
107+
db.session.commit()
108+
# Clear Redis cache so API returns fresh data
109+
for lang in ["en", "ja"]:
110+
delete_index_trees_from_redis(lang)
111+
print(f"After: contribute_role={index.contribute_role}, browsing_role={index.browsing_role}, public_state={index.public_state}")
112+
'
113+
114+
# Grant index-tree-access permission to Contributor role
115+
echo "=== Granting index-tree-access to Contributor ==="
116+
docker compose -f "${compose_file}" exec -T web invenio shell -c '
117+
from invenio_access.models import ActionRoles, Role
118+
from invenio_db import db
119+
role = Role.query.filter_by(name="Contributor").first()
120+
if not role:
121+
raise Exception("Contributor role not found")
122+
existing = ActionRoles.query.filter_by(action="index-tree-access", role_id=role.id).first()
123+
if existing:
124+
print(f"index-tree-access already granted to Contributor (role_id={role.id})")
125+
else:
126+
ar = ActionRoles(action="index-tree-access", role_id=role.id)
127+
db.session.add(ar)
128+
db.session.commit()
129+
print(f"Granted index-tree-access to Contributor (role_id={role.id})")
130+
'
131+
92132
# Validate SWORD mapping 30002
93133
echo "=== SWORD Mapping Validation (30002) ==="
94134
validation_result=$(docker compose -f "${compose_file}" exec -T web invenio shell -c '

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ requests>=2.26.0
99
PyYAML>=5.4.1
1010
matplotlib>=3.4.0
1111
seaborn>=0.11.0
12-
python-dotenv>=0.19.0
12+
python-dotenv>=0.19.0
13+
python-docx>=0.8.11

scripts/grdm.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,9 +283,9 @@ def get_select_file_draggable_locator(page, provider):
283283
def get_select_file_draggable_xpath(name):
284284
return f'//*[contains(@class, "tb-expand-icon-holder")]//*[contains(@class, "file-extension")]/../../following-sibling::*[contains(@class, "title-text")]//*[text() = "{name}"]/../..'
285285

286-
async def wait_for_uploaded(page, filename):
287-
await expect(page.locator(f'//*[text() = "{filename}"]/../following-sibling::*//*[@role = "progressbar"]')).to_have_count(0, timeout=30000)
288-
await expect(get_select_file_title_locator(page, filename)).to_be_visible(timeout=1000)
286+
async def wait_for_uploaded(page, filename, timeout=30000):
287+
await expect(page.locator(f'//*[text() = "{filename}"]/../following-sibling::*//*[@role = "progressbar"]')).to_have_count(0, timeout=timeout)
288+
await expect(get_select_file_title_locator(page, filename)).to_be_visible(timeout=timeout)
289289

290290
def _bytes_to_data_url(byte_data, mime_type="application/octet-stream"):
291291
"""バイト配列をDataURLに変換"""
@@ -334,7 +334,6 @@ async def drop_file(page, element_locator, path):
334334

335335
async def drag_and_drop(page, source, dest):
336336
await expect(source).to_have_class(re.compile('.*ui-draggable.*'))
337-
await expect(dest).to_have_class(re.compile('.*ui-droppable.*'))
338337

339338
center_coordinates_source = await source.evaluate('''element => {
340339
const rect = element.getBoundingClientRect();

scripts/metadata_v2025.py

Lines changed: 72 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from __future__ import annotations
1818

1919
from enum import Enum
20-
from typing import Dict
20+
from typing import Any, Dict
2121

2222

2323
class FieldType(Enum):
@@ -48,18 +48,19 @@ class ProjectMetadataForm:
4848
"プロジェクトの分野": FieldType.POWER_SELECT,
4949
}
5050

51-
def __init__(self, page):
51+
def __init__(self, page, parent_locator=None):
5252
self.page = page
53+
self._root = parent_locator or page
5354

5455
def _get_locator(self, label: str, field_type: FieldType):
5556
base = "//*"
5657
match field_type:
5758
case FieldType.INPUT:
58-
return self.page.locator(
59+
return self._root.locator(
5960
f'{base}[contains(text(), "{label}")]/../following-sibling::div[1]//input'
6061
)
6162
case FieldType.POWER_SELECT:
62-
return self.page.locator(
63+
return self._root.locator(
6364
f'{base}[contains(text(), "{label}")]/../following-sibling::div[1]'
6465
)
6566
case _:
@@ -81,7 +82,7 @@ async def fill(self, label: str, value: str) -> None:
8182
await locator.fill(value)
8283
case FieldType.POWER_SELECT:
8384
await locator.locator(".ember-power-select-trigger").click()
84-
option = self.page.locator(
85+
option = self._root.locator(
8586
f'//li[contains(@class, "ember-power-select-option") and contains(., "{value}")]'
8687
)
8788
await option.click()
@@ -102,7 +103,7 @@ async def fill_power_select_by_search(self, label: str, value: str) -> None:
102103
field_type = self.FIELDS[label]
103104
locator = self._get_locator(label, field_type)
104105
await locator.locator(".ember-power-select-trigger").click()
105-
search = self.page.locator(".ember-power-select-search-input")
106+
search = self._root.locator(".ember-power-select-search-input")
106107
await search.fill(value)
107108
await search.press("Enter")
108109

@@ -115,6 +116,7 @@ class FileMetadataForm:
115116

116117
FIELDS: Dict[str, FieldType] = {
117118
# Basic info
119+
"ファイル種別": FieldType.SELECT,
118120
"データ No.": FieldType.INPUT,
119121
"データの名称または論文表題 (日本語)": FieldType.INPUT,
120122
"Title (English)": FieldType.INPUT,
@@ -139,10 +141,22 @@ class FileMetadataForm:
139141
"リポジトリURL・DOIリンク": FieldType.INPUT,
140142
# Creators
141143
"データ作成者": FieldType.TABLE,
144+
"著者名": FieldType.TABLE,
142145
# Hosting institution
143146
"データ管理機関 (日本語)": FieldType.INPUT,
144147
"Hosting institution (English)": FieldType.INPUT,
145148
"データ管理機関コード": FieldType.INPUT,
149+
# Bibliographic specific fields
150+
"論文(出版社版)のDOI": FieldType.INPUT,
151+
"論文の種類": FieldType.SELECT,
152+
"掲載誌名 (日本語)": FieldType.INPUT,
153+
"Journal Name (English)": FieldType.INPUT,
154+
"発行年月": FieldType.INPUT,
155+
"巻": FieldType.INPUT,
156+
"号": FieldType.INPUT,
157+
"掲載ページ (開始)": FieldType.INPUT,
158+
"掲載ページ (終了)": FieldType.INPUT,
159+
"学術論文を掲載した「機関リポジトリ等の情報基盤」のDOI": FieldType.INPUT,
146160
# Data manager
147161
"データ管理者の種類": FieldType.SELECT,
148162
"データ管理者の e-Rad 研究者番号": FieldType.INPUT,
@@ -159,42 +173,51 @@ class FileMetadataForm:
159173
"Remarks (English)": FieldType.TEXTAREA,
160174
# Metadata access
161175
"メタデータのアクセス権": FieldType.SELECT,
176+
# Publication specific extra
177+
"査読の有無": FieldType.SELECT,
178+
"版情報": FieldType.SELECT,
162179
}
163180

164-
def __init__(self, page):
181+
def __init__(self, page, parent_locator=None):
165182
self.page = page
183+
self._root = parent_locator or page
166184

167185
def _get_locator(self, label: str, field_type: FieldType):
168186
# Special case: 概略データ量 has different xpath
169187
if label == "概略データ量":
170-
return self.page.locator(
188+
return self._root.locator(
171189
'//label[contains(text(), "概略データ量")]/../..//input[contains(@class, "form-control")]'
172190
)
191+
exact_labels = {"号"}
192+
if label in exact_labels:
193+
label_xpath = f'//label[normalize-space(.) = "{label}"]'
194+
else:
195+
label_xpath = f'//label[contains(text(), "{label}")]'
173196

174197
match field_type:
175198
case FieldType.INPUT:
176-
return self.page.locator(
177-
f'//label[contains(text(), "{label}")]/../following-sibling::div[1]//input'
199+
return self._root.locator(
200+
f'{label_xpath}/../following-sibling::div[1]//input'
178201
)
179202
case FieldType.INPUT_DIRECT:
180-
return self.page.locator(
181-
f'//label[contains(text(), "{label}")]/../following-sibling::input[1]'
203+
return self._root.locator(
204+
f'{label_xpath}/../following-sibling::input[1]'
182205
)
183206
case FieldType.TEXTAREA:
184-
return self.page.locator(
185-
f'//label[contains(text(), "{label}")]/../following-sibling::textarea[1]'
207+
return self._root.locator(
208+
f'{label_xpath}/../following-sibling::textarea[1]'
186209
)
187210
case FieldType.SELECT:
188211
if label == "アクセス権":
189-
return self.page.locator(
190-
f'//label[text()="{label}"]/../following-sibling::select[1]'
212+
return self._root.locator(
213+
f'//label[normalize-space(text())="{label}"]/../following-sibling::select[1]'
191214
)
192-
return self.page.locator(
193-
f'//label[contains(text(), "{label}")]/../following-sibling::select[1]'
215+
return self._root.locator(
216+
f'{label_xpath}/../following-sibling::select[1]'
194217
)
195218
case FieldType.TABLE:
196-
return self.page.locator(
197-
f'//label[contains(text(), "{label}")]/../following-sibling::div[1]'
219+
return self._root.locator(
220+
f'{label_xpath}/../following-sibling::div[1]'
198221
)
199222
case _:
200223
raise ValueError(f"Unsupported field type: {field_type}")
@@ -271,3 +294,32 @@ async def get_table_cell(self, label: str, row_index: int, col_index: int) -> st
271294
row = locator.locator(f"table tbody tr:nth-of-type({row_index + 1})")
272295
cell_input = row.locator(f"td:nth-of-type({col_index + 1}) input")
273296
return await cell_input.input_value()
297+
298+
async def fill_author(self, author: Dict[str, Any]) -> None:
299+
"""Add an author row and fill all author fields."""
300+
container = self.get_locator("著者名")
301+
await self.click_table_add_row("著者名")
302+
303+
edit_rows = container.locator('.metadata-edit-mode')
304+
row_count = await edit_rows.count()
305+
if row_count == 0:
306+
raise AssertionError("No edit rows found for authors")
307+
panel = edit_rows.nth(row_count - 1)
308+
await panel.wait_for(state="visible")
309+
310+
await panel.locator('label:has-text("e-Rad 研究者番号") + div input').fill(author['number'])
311+
312+
ja_inputs = panel.locator('label:has-text("名前(日本語)") + div table input')
313+
await ja_inputs.nth(0).fill(author['name_ja']['last'])
314+
await ja_inputs.nth(1).fill(author['name_ja']['middle'])
315+
await ja_inputs.nth(2).fill(author['name_ja']['first'])
316+
317+
en_inputs = panel.locator('label:has-text("Name (English)") + div table input')
318+
await en_inputs.nth(0).fill(author['name_en']['last'])
319+
await en_inputs.nth(1).fill(author['name_en']['middle'])
320+
await en_inputs.nth(2).fill(author['name_en']['first'])
321+
322+
await panel.locator('label:has-text("所属機関名(日本語)") + div input').fill(author['affiliation_ja'])
323+
await panel.locator('label:has-text("所属機関名(英語)") + div input').fill(author['affiliation_en'])
324+
325+
await panel.locator('.hide-edit-row').click()

scripts/playwright.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# ユーティリティ関数群
2+
import asyncio
23
from datetime import datetime
34
import os
45
import shutil
@@ -134,11 +135,20 @@ async def init_pw_context(close_on_fail=True, last_path=None, browser_type='chro
134135
console_messages = []
135136
return (current_session_id, temp_dir)
136137

137-
async def finish_pw_context(screenshot=False, last_path=None):
138+
async def finish_pw_context(screenshot=False, last_path=None, timeout=180):
138139
global current_browser
139-
await _finish_pw_context(screenshot=screenshot, last_path=last_path)
140+
try:
141+
await asyncio.wait_for(
142+
_finish_pw_context(screenshot=screenshot, last_path=last_path),
143+
timeout=timeout
144+
)
145+
except asyncio.TimeoutError:
146+
print(f'finish_pw_context timed out after {timeout} seconds', file=sys.stderr)
140147
if current_browser is not None:
141-
await current_browser.close()
148+
try:
149+
await asyncio.wait_for(current_browser.close(), timeout=30)
150+
except asyncio.TimeoutError:
151+
print('browser.close() timed out', file=sys.stderr)
142152
current_browser = None
143153

144154
async def save_screenshot(path):

0 commit comments

Comments
 (0)