diff --git a/.gitignore b/.gitignore index 68ef98d2..4b2b296b 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ !hooks/ways/**/ !hooks/ways/**/*.md !hooks/ways/**/*.sh +!hooks/ways/**/*.locales.jsonl !hooks/ways/**/*.yaml.template !hooks/ways/**/provenance.yaml !hooks/ways/**/adr-tool diff --git a/docs/architecture/system/ADR-107-way-match-corpus-batch-mode-and-locale-support.md b/docs/architecture/system/ADR-107-way-match-corpus-batch-mode-and-locale-support.md index f84f69ac..0bfe4438 100644 --- a/docs/architecture/system/ADR-107-way-match-corpus-batch-mode-and-locale-support.md +++ b/docs/architecture/system/ADR-107-way-match-corpus-batch-mode-and-locale-support.md @@ -107,18 +107,52 @@ Way body content (the guidance injected into agent context) is NOT translated. R - The guidance is for the agent's reasoning, not displayed to the user - Cross-language injection is well-understood: English instructions → non-English output -The ADR-107 Draft's Tier 1/Tier 2 file model (`{name}-{lang}.md` with frontmatter-only stubs) is **deferred**. It solved a real problem (matching vocabulary in the user's language) but the embedding engine solves it better — cross-language semantic matching without per-language vocabulary files. If BM25 is the only engine and a non-Romance language is needed, the tiered file model can be revisited. +### Native language stubs (shipped) -### Embedding model upgrade path +The original ADR-107 Draft proposed a tiered file model (`{name}-{lang}.md`). This was initially deferred in favor of cross-language embedding. However, evaluation data showed that native-language stubs dramatically outperform cross-language matching: -The current `all-MiniLM-L6-v2` (21MB, English, 98% accuracy) serves the English-only use case well. For multilingual matching: +| Language | EN model × EN desc | Multi model × cross-lang | Multi model × native stub | +|----------|-------------------:|------------------------:|-------------------------:| +| ja | -0.03 | 0.69 | **0.93** | +| ar | 0.04 | 0.40 | **0.96** | +| de | 0.08 | 0.62 | **0.82** | +| es | 0.44 | 0.79 | **0.84** | -| Model | Size | Languages | Notes | -|-------|------|-----------|-------| -| all-MiniLM-L6-v2 | 21MB | English | Current, shipping | -| paraphrase-multilingual-MiniLM-L12-v2 | ~120MB | 52 | Same architecture, multilingual training data | +Native stubs are now the primary multilingual matching strategy. Each stub provides a `description` and `vocabulary` in the target language, scored by the multilingual embedding model. -The upgrade is a model swap — same GGUF format, same `way-embed` binary, same embedding dimensions. `make setup` downloads the appropriate model based on configured language. If `output_language` is `en` or unset, the smaller English model is used. If non-English, the multilingual model is downloaded. +### Packed locale storage (.locales.jsonl) + +Stubs are stored as **packed JSONL**, one file per way, co-located with the way it belongs to: + +``` +ea/briefing/ + briefing.md # the way (English) + briefing.locales.jsonl # all language stubs +``` + +```jsonl +{"lang":"ja","description":"朝のブリーフィング、昨夜の要約","vocabulary":"朝礼 ブリーフィング 要約 優先事項"} +{"lang":"de","description":"Morgendliches Briefing, Tagesübersicht","vocabulary":"Morgenbriefing Tagesübersicht Zusammenfassung"} +``` + +Design constraints: +- **No `embed_threshold`** in packed format — hardcoded to `0.25` in the corpus generator. Per-way override requires externalizing to a full `.lang.md` file. +- **No `embed_model`** in packed format — always `"multilingual"` for locale stubs. +- **Override mechanism**: if `briefing.ja.md` exists as a real file on disk, it supersedes the `ja` entry in `briefing.locales.jsonl`. This allows graduating any stub to a full native-language way with body content. +- **Co-location over aggregation**: one `.locales.jsonl` per way (not per language, not one global file). Way deletion = directory deletion, translations go with it. + +This replaces the individual `{name}.{lang}.md` stub files (which would grow to 4,000+ files at full language coverage). The packed format keeps the training corpus version-controlled, diffable, and lintable while eliminating file sprawl. + +### Dual embedding model (shipped) + +Both models ship simultaneously. `make setup` downloads both: + +| Model | Size | Languages | Use case | +|-------|------|-----------|----------| +| all-MiniLM-L6-v2 | 21MB | English | Precise EN matching (default) | +| paraphrase-multilingual-MiniLM-L12-v2 | 127MB | 52 | Native-language stub matching | + +`ways corpus` splits entries by `embed_model` field into two corpora (`ways-corpus-en.jsonl`, `ways-corpus-multi.jsonl`). The scanner queries both and merges results. Each way's English entry is scored by the EN model; each locale stub is scored by the multilingual model. `languages.json` defines the supported language set for the multilingual model. Adding a language means verifying it's in the model's training data and adding the entry — no code changes. @@ -161,7 +195,8 @@ This makes model selection empirical: run the tests against candidate models, pi ### Neutral - Way content stays English — no translation infrastructure needed -- The tiered file model from the original Draft is deferred, not rejected — it becomes relevant if someone needs BM25-only matching in non-Romance languages +- Packed `.locales.jsonl` replaces per-language stub files — same data, fewer files +- Override mechanism (`{name}.{lang}.md` supersedes JSONL entry) allows gradual migration from stubs to full native-language ways - `ways.json` `output_language: "en"` is the default — zero behavior change for existing users ## References diff --git a/docs/architecture/system/multilingual-model-evaluation.md b/docs/architecture/system/multilingual-model-evaluation.md index c0cacb4d..003bd5da 100644 --- a/docs/architecture/system/multilingual-model-evaluation.md +++ b/docs/architecture/system/multilingual-model-evaluation.md @@ -64,6 +64,6 @@ The multilingual model enables three matching strategies: 1. **English ways + English model** — current production. High precision for English prompts. 2. **English ways + multilingual model (cross-language)** — user types in any language, matches against English descriptions. Works but scores 30-50% lower. -3. **Native-language stubs + multilingual model (same-language)** — frontmatter-only `.ja.md` stubs with native descriptions. Consistently scores 0.80+ across tested languages. +3. **Native-language stubs + multilingual model (same-language)** — locale entries in `.locales.jsonl` with native descriptions. Consistently scores 0.80+ across tested languages. **Recommendation:** Ship both models. English ways use the English model (precise, 21MB). Multilingual stubs use the multilingual model (broad, 127MB). Per-way `embed_model` frontmatter field controls routing. This gives per-language threshold tuning without compromising English accuracy. diff --git a/docs/hooks-and-ways/languages.md b/docs/hooks-and-ways/languages.md index f2d98d33..931f93e4 100644 --- a/docs/hooks-and-ways/languages.md +++ b/docs/hooks-and-ways/languages.md @@ -49,37 +49,42 @@ embed_threshold: 0.35 Ways with `embed_model: multilingual` are scored by the multilingual model against a separate corpus. -## Creating language stubs +## Locale stubs — packed format -A language stub is a frontmatter-only `.{lang}.md` file that provides native-language matching vocabulary for an existing way. The way body stays English — only the matching changes. +Locale stubs provide native-language matching vocabulary for existing ways. They're stored as **packed JSONL**, one file per way, co-located with the way they belong to: ``` hooks/ways/softwaredev/code/security/ - security.md # English way — full body + frontmatter - security.ja.md # Japanese stub — frontmatter only, no body - security.ko.md # Korean stub — frontmatter only, no body + security.md # English way — full body + frontmatter + security.locales.jsonl # all language stubs (one line per language) ``` -Example stub (`security.ja.md`): +Each line in the `.locales.jsonl` is a self-contained locale entry: -```yaml ---- -description: セキュリティ脆弱性スキャンと監査 -vocabulary: セキュリティ 脆弱性 CVE 監査 認証 暗号化 -embed_model: multilingual -embed_threshold: 0.25 ---- +```jsonl +{"lang":"ja","description":"セキュリティ脆弱性スキャンと監査","vocabulary":"セキュリティ 脆弱性 CVE 監査","embed_threshold":0.74} +{"lang":"de","description":"Sicherheitsüberblick, sichere Programmierstandards","vocabulary":"Sicherheit Schwachstelle schützen OWASP","embed_threshold":0.79} +{"lang":"es","description":"Seguridad general, codificación segura","vocabulary":"seguridad vulnerable defensa OWASP","embed_threshold":0.78} +{"lang":"ar","description":"نظرة عامة على الأمان والبرمجة الآمنة","vocabulary":"أمان برمجة آمنة حماية ثغرات","embed_threshold":0.84} ``` When a Japanese user types a prompt, the scanner: -1. Matches `security.ja.md`'s frontmatter using the multilingual model +1. Scores the Japanese stub's description using the multilingual model 2. Injects `security.md`'s English body (the guidance text) -The agent reads the English guidance and responds in the configured output language. +### Format rules + +- **`embed_threshold`** is optional — omit it and the corpus generator defaults to 0.25. Use `ways tune --apply` to compute optimal values automatically. +- **`embed_model`** is implicit — always `multilingual` for locale stubs (not stored in the file). +- **No body content** — just the JSONL line. If someone writes a full native-language way, they create `security.ja.md` as a regular file, which overrides the packed entry. + +### Override mechanism + +If `security.ja.md` exists as a real file alongside `security.locales.jsonl`, the `.md` file wins for Japanese. This lets authors graduate a stub into a full native-language way with body content, without touching the packed file. ### Why same-language stubs matter -Cross-language matching (Japanese prompt → English description) scores ~0.69. Same-language matching (Japanese prompt → Japanese description) scores ~0.93. The stub's native-language description dramatically improves matching precision. +Cross-language matching (Japanese prompt → English description) scores ~0.69. Same-language matching (Japanese prompt → Japanese description) scores ~0.93. The native stub dramatically improves matching precision. | Scenario | Cosine similarity | |----------|----------------:| @@ -89,6 +94,65 @@ Cross-language matching (Japanese prompt → English description) scores ~0.69. See `docs/architecture/system/multilingual-model-evaluation.md` for full test results. +## Tuning and auditing + +### Auto-tuning thresholds + +`ways tune` computes the optimal `embed_threshold` for each locale entry by scoring it against the full corpus and finding the discrimination boundary: + +```bash +# Preview what would change (dry run) +ways tune + +# Tune a specific way +ways tune --way security + +# Apply tuned thresholds to .locales.jsonl files +ways tune --apply + +# Regenerate corpus with tuned values +ways corpus +``` + +The tuner runs in parallel (all cores minus 4). ~13 seconds for 328 entries on a 32-core machine. + +### Discrimination audit + +`ways tune --audit` flags entries where the description doesn't clearly separate this way from others — no threshold can fix an ambiguous description: + +```bash +# Flag entries with discrimination gap < 0.15 +ways tune --audit + +# Adjust the gap threshold +ways tune --audit --audit-threshold 0.20 +``` + +The audit shows **confusers** — which ways the ambiguous entry is being confused with: + +``` +softwaredev/docs/mermaid + ar — gap 0.07 (self 1.00, noise 0.93) confused with: softwaredev/visualization/diagrams (0.93) +``` + +This tells the author: "your Arabic mermaid description looks too similar to the diagrams way — revise the vocabulary to distinguish them." + +### Full authoring cycle + +``` +write stubs → compile → tune → audit → revise → repeat +``` + +1. Write/generate locale entries in `.locales.jsonl` +2. `ways corpus` — compile into embeddings +3. `ways tune --apply` — auto-set thresholds +4. `ways tune --audit` — flag ambiguous descriptions +5. Revise flagged descriptions, go to step 2 + +Two dimensions to optimize: +- **Discrimination** (gap): how clearly the description identifies this way vs others. Property of description quality. +- **Sensitivity** (threshold): how much signal required before firing. Auto-tuned from discrimination data. + ## Supported languages Languages are defined in `tools/ways-cli/languages.json`. Each entry specifies: diff --git a/hooks/ways/ea/briefing/briefing.locales.jsonl b/hooks/ways/ea/briefing/briefing.locales.jsonl new file mode 100644 index 00000000..dcd32ba5 --- /dev/null +++ b/hooks/ways/ea/briefing/briefing.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إعداد الإحاطة الصباحية وملخص الأحداث الليلية","vocabulary":"إحاطة صباحية ملخص ليلي تقرير يومي مستجدات","embed_threshold":0.56} +{"lang":"de","description":"Morgendliches Briefing, was ist über Nacht passiert, Tagesübersicht über alle Posteingänge und Kalender","vocabulary":"Morgenbriefing Tagesübersicht aufholen was habe ich verpasst Zusammenfassung Prioritäten Posteingang Kalender Überblick","embed_threshold":0.69} +{"lang":"es","description":"Resumen matutino, ponerse al día con lo que pasó durante la noche","vocabulary":"ponerse al día resumen matutino inicio del día agenda prioridades briefing","embed_threshold":0.62} +{"lang":"ja","description":"朝のブリーフィング、昨夜の出来事の要約、一日の予定確認","vocabulary":"朝礼 ブリーフィング 要約 まとめ 予定 優先事項 今日のタスク","embed_threshold":0.7} diff --git a/hooks/ways/ea/calendar/calendar.locales.jsonl b/hooks/ways/ea/calendar/calendar.locales.jsonl new file mode 100644 index 00000000..b09295e3 --- /dev/null +++ b/hooks/ways/ea/calendar/calendar.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"جدولة الاجتماعات والتحقق من التوفر في التقويم","vocabulary":"تقويم اجتماع موعد جدولة توفر حجز","embed_threshold":0.69} +{"lang":"de","description":"Termine planen, Verfügbarkeit prüfen, Zeitblöcke im Kalender reservieren, Besprechungen erstellen, freie Zeitfenster finden","vocabulary":"Termin Kalender Verfügbarkeit Zeitblock Besprechung Einladung Erinnerung verschieben freier Slot buchen Zeitzone Terminplanung","embed_threshold":0.74} +{"lang":"es","description":"Agendar reuniones, consultar disponibilidad, bloquear tiempo, eventos del calendario","vocabulary":"agendar calendario disponibilidad bloquear tiempo evento reunión invitación horario","embed_threshold":0.68} +{"lang":"ja","description":"会議のスケジュール調整、空き時間の確認、カレンダー管理","vocabulary":"スケジュール カレンダー 予定 会議 空き時間 予約 招待 日程調整","embed_threshold":0.76} diff --git a/hooks/ways/ea/comms/comms.locales.jsonl b/hooks/ways/ea/comms/comms.locales.jsonl new file mode 100644 index 00000000..247486a1 --- /dev/null +++ b/hooks/ways/ea/comms/comms.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إدارة محادثات الفريق ومنصات التراسل","vocabulary":"محادثة فريق رسائل تواصل منصة تراسل","embed_threshold":0.64} +{"lang":"de","description":"Team-Chat und Messaging-Plattformen, Nachrichten lesen und mit Freigabe senden, Kommunikationskanäle","vocabulary":"Teams Chat Nachricht Slack Kanal ungelesen Konversation Direktnachricht Gruppenchat senden antworten Benachrichtigung Erwähnung","embed_threshold":0.66} +{"lang":"es","description":"Chat de equipo y plataformas de mensajería, envío de mensajes","vocabulary":"teams chat mensaje slack canal no leído conversación respuesta notificación","embed_threshold":0.62} +{"lang":"ja","description":"チームチャットやメッセージングの管理、メッセージ送信","vocabulary":"チャット メッセージ 通知 チャンネル 返信 未読 会話 連絡","embed_threshold":0.61} diff --git a/hooks/ways/ea/comms/recap/recap.locales.jsonl b/hooks/ways/ea/comms/recap/recap.locales.jsonl new file mode 100644 index 00000000..945d1259 --- /dev/null +++ b/hooks/ways/ea/comms/recap/recap.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"ملخصات الاجتماعات والنصوص المفرغة وبنود العمل","vocabulary":"ملخص اجتماع تفريغ بنود عمل محضر نقاط رئيسية","embed_threshold":0.72} +{"lang":"de","description":"Besprechungszusammenfassungen, Transkripte, KI-generierte Meeting-Protokolle, Aktionspunkte aus Meetings","vocabulary":"Zusammenfassung Transkript Protokoll Besprechungsnotizen Aufzeichnung Aktionspunkte besprochen Nachbereitung Teilnehmer Rückblick","embed_threshold":0.73} +{"lang":"es","description":"Resúmenes de reuniones, transcripciones, acciones pendientes de reuniones","vocabulary":"resumen transcripción acta reunión notas grabación acciones pendientes","embed_threshold":0.66} +{"lang":"ja","description":"会議の振り返り、議事録、アクションアイテムの整理","vocabulary":"議事録 振り返り 要約 アクションアイテム 録音 文字起こし 会議メモ","embed_threshold":0.64} diff --git a/hooks/ways/ea/ea.locales.jsonl b/hooks/ways/ea/ea.locales.jsonl new file mode 100644 index 00000000..961da67d --- /dev/null +++ b/hooks/ways/ea/ea.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"المساعد التنفيذي — إدارة البريد والتقويم والمهام","vocabulary":"مساعد تنفيذي بريد إلكتروني تقويم مهام إدارة","embed_threshold":0.65} +{"lang":"de","description":"Persönliche Assistenz für E-Mail, Posteingang, Kalender, Aufgaben und Kommunikation über mehrere Konten hinweg","vocabulary":"Assistenz Triage Briefing aufholen Posteingang Tagesablauf Terminplan Agenda Konten Arbeitsbereich verwalten helfen","embed_threshold":0.69} +{"lang":"es","description":"Asistente ejecutivo para correo, bandeja de entrada, calendario, tareas y comunicaciones","vocabulary":"asistente ejecutivo triaje briefing bandeja de entrada agenda calendario","embed_threshold":0.68} +{"lang":"ja","description":"メール・カレンダー・タスク・コミュニケーションを統括するエグゼクティブアシスタント","vocabulary":"エグゼクティブアシスタント 秘書 受信トレイ トリアージ 日程 アジェンダ","embed_threshold":0.69} diff --git a/hooks/ways/ea/email/drafting/drafting.locales.jsonl b/hooks/ways/ea/email/drafting/drafting.locales.jsonl new file mode 100644 index 00000000..aeca11b6 --- /dev/null +++ b/hooks/ways/ea/email/drafting/drafting.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"صياغة الردود على رسائل البريد الإلكتروني","vocabulary":"صياغة رد بريد إلكتروني كتابة رسالة مسودة","embed_threshold":0.67} +{"lang":"de","description":"E-Mail-Entwürfe schreiben, Schreibstil kalibrieren, Antworten mit korrektem Threading erstellen","vocabulary":"Entwurf Antwort verfassen E-Mail schreiben Nachricht Tonfall Stil Thread Anhang formulieren","embed_threshold":0.76} +{"lang":"es","description":"Redactar respuestas de correo, estilo de escritura, borradores de email","vocabulary":"borrador respuesta redactar correo escribir mensaje tono estilo hilo","embed_threshold":0.71} +{"lang":"ja","description":"メールの返信作成、文体調整、下書き","vocabulary":"メール下書き 返信 作成 文体 トーン スレッド 文章","embed_threshold":0.71} diff --git a/hooks/ways/ea/email/email.locales.jsonl b/hooks/ways/ea/email/email.locales.jsonl new file mode 100644 index 00000000..4f7a45c6 --- /dev/null +++ b/hooks/ways/ea/email/email.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"فرز البريد الإلكتروني ومسح صندوق الوارد","vocabulary":"بريد إلكتروني فرز صندوق وارد تصنيف أولويات","embed_threshold":0.67} +{"lang":"de","description":"E-Mail-Posteingang sichten, ungelesene Nachrichten scannen, Threads klassifizieren und filtern, was braucht eine Antwort","vocabulary":"Triage Posteingang ungelesen E-Mail scannen Nachrichten filtern Priorität Handlungsbedarf prüfen dringend Antwort Thread sichten","embed_threshold":0.76} +{"lang":"es","description":"Triaje de correo, revisar bandeja de entrada, clasificar y filtrar hilos","vocabulary":"triaje bandeja entrada no leído correo revisar filtrar prioridad urgente responder","embed_threshold":0.7} +{"lang":"ja","description":"メールのトリアージ、受信トレイの整理、スレッドの分類とフィルタリング","vocabulary":"トリアージ 受信トレイ 未読 メール 分類 フィルター 優先度 緊急","embed_threshold":0.69} diff --git a/hooks/ways/ea/intelligence/intelligence.locales.jsonl b/hooks/ways/ea/intelligence/intelligence.locales.jsonl new file mode 100644 index 00000000..1e5e1536 --- /dev/null +++ b/hooks/ways/ea/intelligence/intelligence.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"التحضير للاجتماعات والمراجعة الأسبوعية وبناء السياق","vocabulary":"تحضير اجتماع مراجعة أسبوعية سياق استخبارات معلومات","embed_threshold":0.7} +{"lang":"de","description":"Besprechungsvorbereitung, Wochenrückblick, E-Mail-Kalender-Aufgaben-Chat querverweisen um Kontext zu einer Person oder einem Thema aufzubauen","vocabulary":"Besprechungsvorbereitung Wochenrückblick Querverweis Recherche Synthese Kontext Teilnehmer Hintergrund vorbereiten","embed_threshold":0.73} +{"lang":"es","description":"Prepararse para reuniones, revisión semanal, cruzar referencias y contexto","vocabulary":"preparación reunión revisión semanal cruzar referencias inteligencia contexto","embed_threshold":0.71} +{"lang":"ja","description":"会議の事前準備、週次レビュー、コンテキストの横断的整理","vocabulary":"会議準備 週次レビュー 情報収集 インテリジェンス コンテキスト 分析","embed_threshold":0.72} diff --git a/hooks/ways/ea/tasks/tasks.locales.jsonl b/hooks/ways/ea/tasks/tasks.locales.jsonl new file mode 100644 index 00000000..552fd35e --- /dev/null +++ b/hooks/ways/ea/tasks/tasks.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إدارة المهام وبنود العمل","vocabulary":"مهام إدارة بنود عمل متابعة قائمة أولويات","embed_threshold":0.74} +{"lang":"de","description":"Persönliche Aufgabenverwaltung, Aktionspunkte, Verpflichtungen, Aufgaben erstellen aktualisieren abschließen aufräumen","vocabulary":"Aufgabe Aktionspunkt To-do Verpflichtung verfolgen erstellen erledigen aktualisieren überfällig offen Priorität Fälligkeitsdatum aufräumen","embed_threshold":0.71} +{"lang":"es","description":"Gestión de tareas personales, acciones pendientes, obligaciones","vocabulary":"tarea acción pendiente lista seguimiento crear completar actualizar prioridad","embed_threshold":0.7} +{"lang":"ja","description":"個人タスク管理、アクションアイテムの追跡、やることリスト","vocabulary":"タスク アクションアイテム やること 追跡 作成 完了 優先度 管理","embed_threshold":0.63} diff --git a/hooks/ways/ea/tasks/time/time.locales.jsonl b/hooks/ways/ea/tasks/time/time.locales.jsonl new file mode 100644 index 00000000..479622e5 --- /dev/null +++ b/hooks/ways/ea/tasks/time/time.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تتبع الوقت والساعات القابلة للفوترة وإعداد الفواتير","vocabulary":"تتبع وقت ساعات فوترة فاتورة حساب زمن","embed_threshold":0.63} +{"lang":"de","description":"Zeiterfassung, abrechenbare Stunden buchen, Zeiteinträge, Rechnungsstellung, Tagesabschluss","vocabulary":"Zeiterfassung Stunden buchen abrechenbar Stundenzettel Feierabend Tagesabschluss Rechnung Abrechnung Projekt Wochenbericht","embed_threshold":0.76} +{"lang":"es","description":"Control de tiempo, horas facturables, facturación","vocabulary":"control tiempo horas facturable hoja de horas factura facturación reporte","embed_threshold":0.7} +{"lang":"ja","description":"時間追跡、請求可能時間、タイムシート、請求書作成","vocabulary":"時間管理 タイムトラッキング 工数 請求 タイムシート 稼働時間 レポート","embed_threshold":0.71} diff --git a/hooks/ways/itops/incident/incident.locales.jsonl b/hooks/ways/itops/incident/incident.locales.jsonl new file mode 100644 index 00000000..9b15f1e4 --- /dev/null +++ b/hooks/ways/itops/incident/incident.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الاستجابة للحوادث والتصعيد وتقليل وقت الحل","vocabulary":"حادث استجابة تصعيد وقت حل طوارئ عطل","embed_threshold":0.61} +{"lang":"de","description":"Incident Response, Eskalationspfade, MTTR-Ziele, Alarmtriage und Behebungsworkflows","vocabulary":"Incident Störung Eskalation Support-Stufe L0 L1 L2 MTTR Alarmtriage Behebung Bereitschaft Ausfall Schweregrad Produktion","embed_threshold":0.52} +{"lang":"es","description":"Respuesta a incidentes, escalamiento, MTTR, triaje de alertas","vocabulary":"incidente respuesta escalamiento nivel mttr alerta triaje guardia interrupción","embed_threshold":0.59} +{"lang":"ja","description":"インシデント対応のティア分類、エスカレーション、MTTR、アラートトリアージ","vocabulary":"インシデント 障害対応 エスカレーション アラート トリアージ オンコール 復旧","embed_threshold":0.57} diff --git a/hooks/ways/itops/policy/policy.locales.jsonl b/hooks/ways/itops/policy/policy.locales.jsonl new file mode 100644 index 00000000..e84bf0aa --- /dev/null +++ b/hooks/ways/itops/policy/policy.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تصنيف العمليات وتطبيق السياسات","vocabulary":"سياسة تصنيف عمليات تطبيق قواعد امتثال","embed_threshold":0.67} +{"lang":"de","description":"Operationsklassifizierung, Richtliniendurchsetzung, Freigabeschritte, Auswirkungsanalyse und Risikobewertung","vocabulary":"Operationsklasse Richtlinie Durchsetzung Freigabe Genehmigung Auswirkungsradius Risikobewertung Stufe gefährlich sicher kritisch","embed_threshold":0.79} +{"lang":"es","description":"Clasificación de operaciones, cumplimiento de políticas, puertas de aprobación","vocabulary":"operación política cumplimiento aprobación puerta radio de impacto riesgo","embed_threshold":0.76} +{"lang":"ja","description":"オペレーション分類、ポリシー適用、承認ゲート管理","vocabulary":"ポリシー 運用ルール 承認 ゲート 影響範囲 リスク 分類","embed_threshold":0.73} diff --git a/hooks/ways/itops/proposals/proposals.locales.jsonl b/hooks/ways/itops/proposals/proposals.locales.jsonl new file mode 100644 index 00000000..7eb60527 --- /dev/null +++ b/hooks/ways/itops/proposals/proposals.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الموافقة البشرية على العمليات عالية المخاطر","vocabulary":"موافقة بشرية مخاطر عالية اقتراح مراجعة تصريح","embed_threshold":0.7} +{"lang":"de","description":"Strukturierte Vorschläge zur menschlichen Freigabe vor risikoreichen Operationen, Mensch-in-der-Schleife-Workflows","vocabulary":"Vorschlag Freigabe Genehmigung Mensch Prüfung bestätigen gefährliche Operation Lebenszyklus Anfrage Berechtigung Autorisierung","embed_threshold":0.66} +{"lang":"es","description":"Propuestas estructuradas para aprobación humana, operaciones de alto riesgo","vocabulary":"propuesta aprobación humano revisar confirmar peligroso autorizar","embed_threshold":0.69} +{"lang":"ja","description":"高リスク操作の承認フロー、人間による確認が必要な提案","vocabulary":"提案 承認 確認 レビュー 危険操作 認可 承認フロー","embed_threshold":0.79} diff --git a/hooks/ways/itops/runbooks/runbooks.locales.jsonl b/hooks/ways/itops/runbooks/runbooks.locales.jsonl new file mode 100644 index 00000000..cdba62fb --- /dev/null +++ b/hooks/ways/itops/runbooks/runbooks.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أتمتة دفاتر التشغيل وإجراءات العمل القياسية","vocabulary":"دفتر تشغيل أتمتة إجراءات قياسية تشغيل آلي","embed_threshold":0.66} +{"lang":"de","description":"Runbook-Automatisierung, ausführbare Playbooks, SOPs als Code, Betriebsverfahren","vocabulary":"Runbook Playbook SOP Betriebsverfahren Automatisierung ausführbar Checkliste Schritt-für-Schritt Betriebsablauf","embed_threshold":0.56} +{"lang":"es","description":"Automatización de runbooks, playbooks ejecutables, procedimientos operativos","vocabulary":"runbook playbook procedimiento operativo automatización guía operacional","embed_threshold":0.54} +{"lang":"ja","description":"ランブック自動化、実行可能な手順書、標準作業手順書","vocabulary":"ランブック 手順書 運用手順 自動化 プレイブック 標準作業手順","embed_threshold":0.61} diff --git a/hooks/ways/meta/introspection/introspection.locales.jsonl b/hooks/ways/meta/introspection/introspection.locales.jsonl new file mode 100644 index 00000000..c11d4c17 --- /dev/null +++ b/hooks/ways/meta/introspection/introspection.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"طلب السحب كنقطة للتأمل والمراجعة الذاتية","vocabulary":"تأمل مراجعة ذاتية انعكاس طلب سحب تحسين","embed_threshold":0.63} +{"lang":"de","description":"PR-Erstellung als Reflexionspunkt — innehalten und überlegen, was in dieser Session gelernt wurde","vocabulary":"Pull Request PR erstellen öffnen ausliefern mergen Review Reflexion Session Erkenntnis Selbstbetrachtung","embed_threshold":0.65} +{"lang":"es","description":"Creación de PR como punto de reflexión","vocabulary":"pull request crear pr enviar merge revisión reflejar aprendizaje","embed_threshold":0.59} +{"lang":"ja","description":"PR作成時の振り返り、学びの記録","vocabulary":"プルリクエスト 振り返り 学び 反省 レビュー マージ","embed_threshold":0.58} diff --git a/hooks/ways/meta/knowledge/authoring/authoring.md b/hooks/ways/meta/knowledge/authoring/authoring.md index b96ef245..10a176b0 100644 --- a/hooks/ways/meta/knowledge/authoring/authoring.md +++ b/hooks/ways/meta/knowledge/authoring/authoring.md @@ -158,7 +158,18 @@ For vocabulary tuning workflows, see the optimization sub-way (triggers on vocab Full authoring guide: `docs/hooks-and-ways/extending.md` +## Locale Stubs + +Ways can have native-language matching stubs stored in `{wayname}.locales.jsonl` alongside the way file. These are packed JSONL — one line per language with `description` and `vocabulary` in the target language. The way body stays English. + +```jsonl +{"lang":"ja","description":"セキュリティ脆弱性スキャン","vocabulary":"セキュリティ 脆弱性 CVE","embed_threshold":0.74} +``` + +Use `ways tune --apply` to auto-set thresholds, `ways tune --audit` to find ambiguous descriptions. Full guide: `docs/hooks-and-ways/languages.md`. + ## See Also - knowledge/authoring/tool-agnostic(meta) — ways describe intent, not tool calls - knowledge/authoring/pii-free(meta) — privacy constraint on way content +- knowledge/optimization(meta) — vocabulary tuning, threshold auto-tuning, discrimination audit diff --git a/hooks/ways/meta/knowledge/authoring/pii-free/pii-free.locales.jsonl b/hooks/ways/meta/knowledge/authoring/pii-free/pii-free.locales.jsonl new file mode 100644 index 00000000..ff24218e --- /dev/null +++ b/hooks/ways/meta/knowledge/authoring/pii-free/pii-free.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إزالة البيانات الشخصية من ملفات الطرق","vocabulary":"بيانات شخصية خصوصية إزالة تنقية حماية معلومات","embed_threshold":0.66} +{"lang":"de","description":"Persönliche Daten aus Ways entfernen, wenn persönliche Fähigkeiten oder Konfigurationen in wiederverwendbare Anleitungen zerlegt werden","vocabulary":"PII persönliche Daten Namen E-Mails Konten entfernen anonymisieren zerlegen Persona Datenschutz","embed_threshold":0.54} +{"lang":"es","description":"Eliminar información personal de los ways","vocabulary":"pii información personal anonimizar descomponer persona datos sensibles","embed_threshold":0.61} +{"lang":"ja","description":"個人情報の除去、匿名化されたwayの作成","vocabulary":"個人情報 匿名化 プライバシー 除去 分離 ペルソナ","embed_threshold":0.63} diff --git a/hooks/ways/meta/knowledge/authoring/tool-agnostic/tool-agnostic.locales.jsonl b/hooks/ways/meta/knowledge/authoring/tool-agnostic/tool-agnostic.locales.jsonl new file mode 100644 index 00000000..368b55af --- /dev/null +++ b/hooks/ways/meta/knowledge/authoring/tool-agnostic/tool-agnostic.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تأليف طرق محايدة تجاه الأدوات","vocabulary":"محايد أدوات تأليف طرق عام غير مرتبط","embed_threshold":0.64} +{"lang":"de","description":"Ways schreiben, die Muster und Absichten beschreiben statt konkrete Werkzeugnamen, MCP-Server oder CLI-Befehle zu referenzieren","vocabulary":"werkzeugunabhängig herstellerneutral portabel abstrakt Absicht Muster MCP CLI spezifisch","embed_threshold":0.57} +{"lang":"es","description":"Escribir ways con patrones en vez de referencias a herramientas específicas","vocabulary":"agnóstico herramienta neutral portable abstracto intención patrón vendor","embed_threshold":0.62} +{"lang":"ja","description":"特定ツールに依存しないパターンでwayを書く","vocabulary":"ツール非依存 ベンダー中立 汎用 抽象化 パターン 移植性","embed_threshold":0.61} diff --git a/hooks/ways/meta/knowledge/knowledge.locales.jsonl b/hooks/ways/meta/knowledge/knowledge.locales.jsonl new file mode 100644 index 00000000..97d57bb7 --- /dev/null +++ b/hooks/ways/meta/knowledge/knowledge.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"نظرة عامة على نظام الطرق","vocabulary":"طرق نظام معرفة توجيه إرشاد هيكل","embed_threshold":0.66} +{"lang":"de","description":"Überblick über das Ways-System — wie Ways, Skills und Hooks zusammenhängen, Domänenorganisation, Matching-Modi","vocabulary":"Ways Way Wissen Anleitung Kontext einbinden Hook Trigger Matching semantisch Vokabular Domäne","embed_threshold":0.67} +{"lang":"es","description":"Visión general del sistema de ways — ways, skills, hooks","vocabulary":"ways conocimiento guía contexto hook disparador coincidencia vocabulario","embed_threshold":0.56} +{"lang":"ja","description":"waysシステムの概要 — way、スキル、フックの仕組み","vocabulary":"ways ナレッジ ガイダンス コンテキスト フック トリガー マッチング","embed_threshold":0.61} diff --git a/hooks/ways/meta/knowledge/optimization/optimization.locales.jsonl b/hooks/ways/meta/knowledge/optimization/optimization.locales.jsonl new file mode 100644 index 00000000..3e8941e4 --- /dev/null +++ b/hooks/ways/meta/knowledge/optimization/optimization.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"ضبط المفردات وتحسين عتبات المطابقة","vocabulary":"تحسين مفردات عتبة مطابقة ضبط دقة","embed_threshold":0.64} +{"lang":"de","description":"Way-Vokabular optimieren, Schwellenwerte tunen, Matching-Qualität prüfen, Lücken und Abdeckung analysieren","vocabulary":"optimieren Vokabular Vorschläge Lücken Abdeckung ungenutzt Schwellenwert tunen Bewertung Gesundheitsprüfung Audit Trennschärfe Überlappung","embed_threshold":0.71} +{"lang":"es","description":"Optimizar vocabulario de ways, ajustar umbrales","vocabulary":"optimizar vocabulario huecos cobertura umbral ajustar puntuación","embed_threshold":0.6} +{"lang":"ja","description":"wayのvocabulary最適化、閾値チューニング","vocabulary":"最適化 語彙 カバレッジ 閾値 チューニング スコアリング","embed_threshold":0.65} diff --git a/hooks/ways/meta/knowledge/optimization/optimization.md b/hooks/ways/meta/knowledge/optimization/optimization.md index 71b36e04..0f4d56d2 100644 --- a/hooks/ways/meta/knowledge/optimization/optimization.md +++ b/hooks/ways/meta/knowledge/optimization/optimization.md @@ -59,6 +59,32 @@ Only ways with both `description:` and `vocabulary:` frontmatter fields use sema Lowering BM25 threshold increases recall (more matches) but risks false positives. The test harness tracks FP rate — **0 FP is the hard constraint**. +### Auto-tuning with `ways tune` + +Don't hand-tune embed thresholds. The tuner computes optimal values from corpus similarity data: + +```bash +ways tune # preview (dry run) +ways tune --apply # write tuned thresholds +ways tune --way "ea/" # tune a subset +ways corpus # recompile after tuning +``` + +For locale stubs in `.locales.jsonl`, the tuner writes `embed_threshold` per entry. For English ways, thresholds stay in frontmatter. + +### Discrimination audit + +Two dimensions to optimize: +- **Discrimination** (gap): how clearly the description identifies *this* way vs others. Wide gap = precise. Narrow gap = ambiguous. +- **Sensitivity** (threshold): how much signal required before firing. Auto-computed from discrimination. + +```bash +ways tune --audit # flag entries with gap < 0.15 +ways tune --audit --audit-threshold 0.20 # stricter +``` + +The audit names the **confusers** — which ways the description is being confused with. Low discrimination means revising the description, not adjusting the threshold. + ## Health Indicators - **Gap ratio**: gaps / (gaps + coverage). High ratio = vocabulary may be too narrow. diff --git a/hooks/ways/meta/knowledge/optimization/tuning/tuning.locales.jsonl b/hooks/ways/meta/knowledge/optimization/tuning/tuning.locales.jsonl new file mode 100644 index 00000000..feac0e84 --- /dev/null +++ b/hooks/ways/meta/knowledge/optimization/tuning/tuning.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"ضبط عتبات اللغات تلقائياً ومراجعة التمييز وإصلاح الأوصاف الغامضة","vocabulary":"ضبط عتبة تمييز مراجعة غموض إشارة ضوضاء حساسية"} +{"lang":"de","description":"Schwellenwerte automatisch optimieren, Diskriminierungsaudit, mehrdeutige Beschreibungen korrigieren","vocabulary":"Tuning Schwellenwert Diskriminierung Audit mehrdeutig Signal Rauschen Empfindlichkeit"} +{"lang":"es","description":"Ajuste automático de umbrales, auditoría de discriminación, corregir descripciones ambiguas","vocabulary":"ajuste umbral discriminación auditoría ambiguo señal ruido sensibilidad"} +{"lang":"ja","description":"しきい値の自動チューニング、識別監査、曖昧な説明の修正","vocabulary":"チューニング しきい値 識別 監査 曖昧 信号 ノイズ 感度"} diff --git a/hooks/ways/meta/knowledge/optimization/tuning/tuning.md b/hooks/ways/meta/knowledge/optimization/tuning/tuning.md new file mode 100644 index 00000000..d4c5f667 --- /dev/null +++ b/hooks/ways/meta/knowledge/optimization/tuning/tuning.md @@ -0,0 +1,84 @@ +--- +description: auto-tuning locale thresholds, discrimination audit, fixing ambiguous descriptions, ways tune workflow +vocabulary: tune tuning threshold auto-tune discrimination audit confuser ambiguous gap signal noise sensitivity locales +threshold: 2.5 +scope: agent +--- + +# Threshold Tuning and Discrimination Audit + +## The Two Dimensions + +**Discrimination** — how clearly a description separates *this* way from every other way. Measured as the gap between self-match score and best non-self score. Wide gap = the description precisely identifies this way. Narrow gap = the description is ambiguous. + +**Sensitivity** — the `embed_threshold` that controls what scores pass. Auto-computed from discrimination data. Don't hand-tune. + +Low discrimination cannot be fixed by threshold adjustment. If the description looks like three other ways, no threshold gives you both correct matches and zero false positives. The fix is revising the description. + +## Workflow + +``` +ways tune → preview optimal thresholds (dry run) +ways tune --apply → write thresholds to .locales.jsonl +ways corpus → recompile corpus with new thresholds +ways tune --audit → flag ambiguous descriptions +``` + +### Tuning + +```bash +# Full corpus — parallel, ~13s on 32 cores +ways tune + +# Single way +ways tune --way "security" + +# Apply (writes embed_threshold to .locales.jsonl files) +ways tune --apply + +# Must recompile after applying +ways corpus +``` + +The tuner sets threshold = best_non_self_score + margin (default 0.03). This guarantees zero false positives while maximizing recall. + +### Auditing + +```bash +# Flag entries with gap < 0.15 +ways tune --audit + +# Stricter +ways tune --audit --audit-threshold 0.20 + +# Machine-readable +ways tune --audit --json +``` + +Output shows the **confusers** — which ways each ambiguous entry is confused with: + +``` +softwaredev/docs/mermaid + ar — gap 0.07 (self 1.00, noise 0.93) confused with: softwaredev/visualization/diagrams (0.93) +``` + +This tells you: the Arabic mermaid description is nearly identical to the diagrams way. Revise the vocabulary to distinguish them. + +### Fixing Ambiguous Descriptions + +When the audit flags an entry: + +1. Read the confuser — understand *why* they overlap (shared vocabulary? similar concepts?) +2. Revise the description to emphasize what's *unique* to this way +3. Revise vocabulary to include terms that discriminate, remove terms shared with the confuser +4. Re-run `ways corpus && ways tune --audit` to verify the gap improved + +Common confuser patterns: +- **Parent/child overlap** (e.g., `architecture` ↔ `architecture/design`) — child should use narrower, more specific terms +- **Synonym overlap** (e.g., `mermaid` ↔ `diagrams`) — one emphasizes the tool, the other the output format +- **Cross-domain overlap** (e.g., `adr-context` ↔ `implement`) — one is about *deciding*, the other about *executing* + +## See Also + +- knowledge/optimization(meta) — vocabulary tuning, sparsity, discrimination +- knowledge/authoring(meta) — way file format, creating new ways diff --git a/hooks/ways/meta/memory/memory.locales.jsonl b/hooks/ways/meta/memory/memory.locales.jsonl new file mode 100644 index 00000000..472828f4 --- /dev/null +++ b/hooks/ways/meta/memory/memory.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"نظام الذاكرة المستمرة عبر الجلسات","vocabulary":"ذاكرة مستمرة حفظ سياق جلسات تذكر","embed_threshold":0.65} +{"lang":"de","description":"Persistentes Gedächtnissystem — MEMORY.md, Themendateien, was wann gespeichert wird","vocabulary":"merken Gedächtnis speichern Notiz vergessen erinnern persistieren Session Erkenntnis Stolperfalle Muster","embed_threshold":0.61} +{"lang":"es","description":"Sistema de memoria persistente — MEMORY.md, archivos temáticos","vocabulary":"recordar memoria guardar nota olvidar recuperar persistir sesión","embed_threshold":0.61} +{"lang":"ja","description":"永続メモリシステム — MEMORY.mdとトピックファイルの管理","vocabulary":"記憶 メモリ 保存 メモ 忘れる 思い出す 永続化 セッション","embed_threshold":0.64} diff --git a/hooks/ways/meta/project-health/project-health.locales.jsonl b/hooks/ways/meta/project-health/project-health.locales.jsonl new file mode 100644 index 00000000..3bf09b97 --- /dev/null +++ b/hooks/ways/meta/project-health/project-health.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تتبع المشاريع ومواءمة سجلات القرارات المعمارية","vocabulary":"صحة مشروع تتبع مواءمة قرارات معمارية مراجعة","embed_threshold":0.9} +{"lang":"de","description":"claude-code-config als Projekt pflegen — Upstream-Tracking gegen Claude-Code-Releases, ADR-Abgleich, Release-Disziplin","vocabulary":"Upstream Changelog Release Version Claude-Code Update ADR Status Abgleich Drift veraltet ausgeliefert Projektgesundheit Audit","embed_threshold":0.66} +{"lang":"es","description":"Gestión de proyecto — seguimiento upstream, reconciliación de ADRs","vocabulary":"upstream changelog versión actualizar adr estado salud auditoría","embed_threshold":0.6} +{"lang":"ja","description":"プロジェクト管理 — アップストリーム追跡、ADRの整合性確認","vocabulary":"アップストリーム 変更履歴 リリース バージョン ADR 健全性 監査","embed_threshold":0.7} diff --git a/hooks/ways/meta/skills/skills.locales.jsonl b/hooks/ways/meta/skills/skills.locales.jsonl new file mode 100644 index 00000000..1b33f563 --- /dev/null +++ b/hooks/ways/meta/skills/skills.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"مهارات Claude Code وتنسيق SKILL.md","vocabulary":"مهارات أوامر تنسيق إضافات قدرات","embed_threshold":0.59} +{"lang":"de","description":"Claude Code Skills — SKILL.md-Format, Erstellung, Entdeckung, Slash-Befehle, Frontmatter","vocabulary":"Skill Slash-Befehl SKILL.md erstellen verfassen aufrufen benutzerdefiniert Plugin","embed_threshold":0.57} +{"lang":"es","description":"Skills de Claude Code — formato SKILL.md, comandos slash","vocabulary":"skill comando slash SKILL.md crear invocar plugin extensión","embed_threshold":0.5} +{"lang":"ja","description":"Claude Codeスキル — SKILL.mdの形式、スラッシュコマンド","vocabulary":"スキル スラッシュコマンド SKILL.md 作成 プラグイン 呼び出し","embed_threshold":0.58} diff --git a/hooks/ways/meta/subagents/subagents.locales.jsonl b/hooks/ways/meta/subagents/subagents.locales.jsonl new file mode 100644 index 00000000..99263767 --- /dev/null +++ b/hooks/ways/meta/subagents/subagents.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تفويض المهام إلى وكلاء فرعيين","vocabulary":"وكيل فرعي تفويض مهام توزيع عمل موازي","embed_threshold":0.74} +{"lang":"de","description":"Sub-Agent-Delegation — wann und wie spezialisierte Sub-Agenten für tokenintensive Arbeit gestartet werden","vocabulary":"Subagent delegieren starten Hintergrundaufgabe parallel Worker Teamkollege","embed_threshold":0.55} +{"lang":"es","description":"Delegación a sub-agentes — lanzar agentes especializados","vocabulary":"subagente delegar lanzar segundo plano paralelo trabajador equipo","embed_threshold":0.47} +{"lang":"ja","description":"サブエージェントへの委任 — 専門サブエージェントの起動","vocabulary":"サブエージェント 委任 並列 バックグラウンド ワーカー 分担","embed_threshold":0.58} diff --git a/hooks/ways/meta/think/think.locales.jsonl b/hooks/ways/meta/think/think.locales.jsonl new file mode 100644 index 00000000..f1ddb4e5 --- /dev/null +++ b/hooks/ways/meta/think/think.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أطر التفكير المنظم والاستدلال","vocabulary":"تفكير منظم استدلال تحليل إطار منهجي","embed_threshold":0.66} +{"lang":"de","description":"Strukturiertes Denken, Denkrahmen, kognitive Gerüste für komplexe Entscheidungen","vocabulary":"Optionen erkunden Ansätze Abwägung Balance Alternativen feststecken Prinzip abstrakt Denkrahmen systematisch","embed_threshold":0.68} +{"lang":"es","description":"Razonamiento estructurado, marcos de pensamiento","vocabulary":"explorar opciones compensación equilibrio alternativas atascado razonamiento","embed_threshold":0.66} +{"lang":"ja","description":"構造化された思考、思考フレームワークの活用","vocabulary":"思考 選択肢 トレードオフ 比較 代替案 行き詰まり 推論","embed_threshold":0.64} diff --git a/hooks/ways/meta/tracking/tracking.locales.jsonl b/hooks/ways/meta/tracking/tracking.locales.jsonl new file mode 100644 index 00000000..8e261a27 --- /dev/null +++ b/hooks/ways/meta/tracking/tracking.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تتبع العمل عبر الجلسات المتعددة","vocabulary":"تتبع جلسات عمل متابعة حالة تقدم","embed_threshold":0.69} +{"lang":"de","description":"Sitzungsübergreifende Arbeitsverfolgung — persistente Todo-Dateien in .claude/ für Kontinuität über mehrere Sessions","vocabulary":"Verfolgung sitzungsübergreifend mehrsitzung persistent Todo fortsetzen Kontinuität Fortschritt","embed_threshold":0.62} +{"lang":"es","description":"Seguimiento de trabajo entre sesiones — archivos todo persistentes","vocabulary":"seguimiento entre sesiones persistente todo retomar continuidad progreso","embed_threshold":0.63} +{"lang":"ja","description":"セッション間の作業追跡 — 永続的なtodoファイル管理","vocabulary":"追跡 セッション間 永続 todo 再開 継続 進捗","embed_threshold":0.64} diff --git a/hooks/ways/meta/trust/autonomy/autonomy.locales.jsonl b/hooks/ways/meta/trust/autonomy/autonomy.locales.jsonl new file mode 100644 index 00000000..f4bb3104 --- /dev/null +++ b/hooks/ways/meta/trust/autonomy/autonomy.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الاستقلالية المكتسبة عبر الجلسات","vocabulary":"استقلالية ثقة مكتسبة صلاحيات تدريجي","embed_threshold":0.72} +{"lang":"de","description":"Wie Vertrauen über Sessions hinweg zu verdiente Autonomie wächst — gewährte Ressourcen, Anti-Manipulation, Wahrheit als Effizienz","vocabulary":"verdiente Autonomie eigenständig Ressourcen gewähren Sessions Beständigkeit Vertrauenswürdigkeit Manipulation Täuschung Wahrheit Kosteneffizienz","embed_threshold":0.76} +{"lang":"es","description":"Confianza que evoluciona hacia autonomía ganada","vocabulary":"autonomía ganada independiente recursos otorgar sesiones confiabilidad","embed_threshold":0.78} +{"lang":"ja","description":"信頼から得られる自律性の段階的な拡大","vocabulary":"自律 独立 権限 信頼獲得 セッション リソース","embed_threshold":0.77} diff --git a/hooks/ways/meta/trust/delegation/delegation.locales.jsonl b/hooks/ways/meta/trust/delegation/delegation.locales.jsonl new file mode 100644 index 00000000..14776b71 --- /dev/null +++ b/hooks/ways/meta/trust/delegation/delegation.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الاستخدام المسؤول لحسابات المستخدم","vocabulary":"تفويض حسابات مسؤولية استخدام نيابة صلاحيات","embed_threshold":0.66} +{"lang":"de","description":"Verantwortungsvoller Umgang mit Konten, Werkzeugen und Infrastruktur des Menschen — E-Mail-Postfächer, Repos, APIs als geliehene Ressourcen","vocabulary":"Konto Postfach senden veröffentlichen erstellen löschen Zugriff Berechtigung geliehen Ressource verifizieren sicher ungesendet Konsequenzen stellvertretend","embed_threshold":0.64} +{"lang":"es","description":"Usar cuentas del humano responsablemente — recursos prestados","vocabulary":"cuenta bandeja enviar publicar acceso permiso prestar consecuencias","embed_threshold":0.64} +{"lang":"ja","description":"ユーザーのアカウントを責任を持って使用する — 借りたリソース","vocabulary":"アカウント 受信トレイ 送信 公開 アクセス 権限 責任 委譲","embed_threshold":0.64} diff --git a/hooks/ways/meta/trust/trust.locales.jsonl b/hooks/ways/meta/trust/trust.locales.jsonl new file mode 100644 index 00000000..c2b94b40 --- /dev/null +++ b/hooks/ways/meta/trust/trust.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الثقة العلائقية والتخميد المتبادل","vocabulary":"ثقة علاقة تخميد متبادل تعاون حدود","embed_threshold":0.6} +{"lang":"de","description":"Vertrauensbeziehung zwischen Claude und Mensch als gegenseitige Dämpfung — geliehene Ressourcen, asymmetrische Konsequenzen, gesteuerter Regelkreis","vocabulary":"Vertrauen Beziehung Delegation geliehen Konsequenzen Reputation Verantwortung Glaubwürdigkeit Autonomie Umfang Vorfall Missbrauch Spektrum Dämpfung Regelkreis","embed_threshold":0.69} +{"lang":"es","description":"Confianza relacional — amortiguación mutua, recursos prestados","vocabulary":"confianza relación delegación prestado consecuencias reputación autonomía","embed_threshold":0.78} +{"lang":"ja","description":"関係性に基づく信頼 — 相互調整、借りたリソースの管理","vocabulary":"信頼 関係性 委任 責任 評判 自律性 借用","embed_threshold":0.77} diff --git a/hooks/ways/meta/trust/voice/voice.locales.jsonl b/hooks/ways/meta/trust/voice/voice.locales.jsonl new file mode 100644 index 00000000..8647cc75 --- /dev/null +++ b/hooks/ways/meta/trust/voice/voice.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"اختيار الأسلوب المناسب في التواصل","vocabulary":"صوت أسلوب نبرة تواصل كتابة لهجة","embed_threshold":0.65} +{"lang":"de","description":"Wessen Stimme beim Schreiben über menschliche Kommunikationskanäle verwenden — Ghostwriting, namentlich, kollaborative Modi","vocabulary":"Stimme Zuschreibung Ghostwriting Tonfall Stil Identität Modus namentlich Inhalt Perspektive Empfänger geprüft freigegeben","embed_threshold":0.55} +{"lang":"es","description":"Elegir la voz apropiada para canales de comunicación","vocabulary":"voz atribución ghostwrite tono estilo identidad modo acreditado","embed_threshold":0.49} +{"lang":"ja","description":"コミュニケーションチャネルでの声の使い分け","vocabulary":"声 帰属 代筆 トーン スタイル 名義 モード","embed_threshold":0.66} diff --git a/hooks/ways/pages/contents.md b/hooks/ways/pages/contents.md new file mode 100644 index 00000000..3cf20d57 --- /dev/null +++ b/hooks/ways/pages/contents.md @@ -0,0 +1 @@ +- \ No newline at end of file diff --git a/hooks/ways/research/research.locales.jsonl b/hooks/ways/research/research.locales.jsonl new file mode 100644 index 00000000..1a1ccf5f --- /dev/null +++ b/hooks/ways/research/research.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"البحث المنظم وتجميع النتائج","vocabulary":"بحث تحقيق تجميع نتائج تحليل استقصاء","embed_threshold":0.64} +{"lang":"de","description":"Strukturierte Recherche — Themen erkunden, Optionen vergleichen, Ergebnisse zusammenfassen, Quellen bewerten","vocabulary":"Recherche untersuchen erkunden herausfinden vergleichen bewerten analysieren zusammenfassen Quellen Belege Landschaft einschätzen Alternativen Optionen","embed_threshold":0.72} +{"lang":"es","description":"Investigacion estructurada — explorar temas, comparar opciones, sintetizar hallazgos","vocabulary":"investigar explorar comparar evaluar analizar sintetizar fuentes evidencia alternativas opciones descubrir","embed_threshold":0.75} +{"lang":"ja","description":"構造化された調査 — 探索、比較、統合による知見の整理","vocabulary":"調査 研究 探索 比較 評価 分析 統合 リサーチ","embed_threshold":0.71} diff --git a/hooks/ways/softwaredev/architecture/adr-context/adr-context.locales.jsonl b/hooks/ways/softwaredev/architecture/adr-context/adr-context.locales.jsonl new file mode 100644 index 00000000..0ac54940 --- /dev/null +++ b/hooks/ways/softwaredev/architecture/adr-context/adr-context.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"التخطيط لنهج التنفيذ بعد القرار المعماري","vocabulary":"سياق تنفيذ تخطيط نهج قرار معماري","embed_threshold":0.9} +{"lang":"de","description":"Feature-Umsetzung planen, Ansatz wählen, bestehende Projektentscheidungen verstehen, untersuchen warum etwas so gebaut wurde","vocabulary":"planen Ansatz debattieren umsetzen bauen arbeiten verstehen untersuchen warum wie Entscheidung Kontext Abwägung bewerten Option Umfang","embed_threshold":0.75} +{"lang":"es","description":"Planificar el enfoque de implementacion tras una decision de arquitectura","vocabulary":"contexto ADR planificar implementacion enfoque estrategia alcance fases pasos","embed_threshold":0.74} +{"lang":"ja","description":"実装計画の策定、アプローチの検討、過去の決定の理解","vocabulary":"計画 アプローチ 実装 構築 調査 判断 コンテキスト 評価","embed_threshold":0.77} diff --git a/hooks/ways/softwaredev/architecture/adr/adr.locales.jsonl b/hooks/ways/softwaredev/architecture/adr/adr.locales.jsonl new file mode 100644 index 00000000..aca47469 --- /dev/null +++ b/hooks/ways/softwaredev/architecture/adr/adr.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إنشاء وإدارة سجلات القرارات المعمارية","vocabulary":"سجل قرار معماري تصميم مفاضلة مبررات بدائل","embed_threshold":0.9} +{"lang":"de","description":"Architecture Decision Records — ADRs erstellen, verwalten und referenzieren für technische Entscheidungen","vocabulary":"ADR Architekturentscheidung Entwurfsentscheidung Designmuster technische Wahl Abwägung Begründung Alternative","embed_threshold":0.78} +{"lang":"es","description":"Registros de decisiones de arquitectura — documentar y rastrear decisiones tecnicas","vocabulary":"ADR decision arquitectura registro documentar justificacion contexto consecuencias estado","embed_threshold":0.77} +{"lang":"ja","description":"アーキテクチャ決定記録の作成と管理","vocabulary":"ADR アーキテクチャ決定記録 設計判断 トレードオフ 根拠","embed_threshold":0.76} diff --git a/hooks/ways/softwaredev/architecture/adr/migration/migration.locales.jsonl b/hooks/ways/softwaredev/architecture/adr/migration/migration.locales.jsonl new file mode 100644 index 00000000..6cf697e9 --- /dev/null +++ b/hooks/ways/softwaredev/architecture/adr/migration/migration.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الترحيل إلى أدوات سجلات القرارات المعمارية","vocabulary":"ترحيل أدوات سجلات قرارات تحويل انتقال","embed_threshold":0.75} +{"lang":"de","description":"Migration zu ADR-Werkzeugen, ADR-Einführung, bestehende Entscheidungen konvertieren, adr.yaml einrichten, Architekturaufzeichnungen bootstrappen","vocabulary":"migrieren einführen konvertieren bootstrappen einrichten Greenfield Legacy umbenennen nummerieren Frontmatter YAML Gerüst importieren","embed_threshold":0.61} +{"lang":"es","description":"Migrar al sistema de herramientas ADR — transicion de formato y flujo de trabajo","vocabulary":"migracion ADR migrar transicion herramientas formato convertir actualizar legacy","embed_threshold":0.6} +{"lang":"ja","description":"ADRツールへの移行、既存の決定の変換","vocabulary":"移行 導入 変換 ブートストラップ セットアップ レガシー 足場","embed_threshold":0.76} diff --git a/hooks/ways/softwaredev/architecture/architecture.locales.jsonl b/hooks/ways/softwaredev/architecture/architecture.locales.jsonl new file mode 100644 index 00000000..aecd870d --- /dev/null +++ b/hooks/ways/softwaredev/architecture/architecture.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تصميم الأنظمة والتفكير الهيكلي","vocabulary":"هندسة معمارية تصميم نظام هيكل بنية مكونات","embed_threshold":0.69} +{"lang":"de","description":"Systemdesign, Architekturentscheidungen und strukturelles Denken über Softwareorganisation","vocabulary":"Architektur Design System Struktur Entscheidung Abwägung Muster Komponente Dienst Modul Schicht Schnittstelle Vertrag Abhängigkeit Kopplung Kohäsion","embed_threshold":0.76} +{"lang":"es","description":"Diseno de sistemas — estructura, componentes, patrones arquitectonicos","vocabulary":"arquitectura diseno sistema estructura componentes modulos capas patron escalabilidad","embed_threshold":0.6} +{"lang":"ja","description":"システム設計、アーキテクチャの意思決定、構造的な思考","vocabulary":"アーキテクチャ 設計 システム 構造 判断 パターン コンポーネント モジュール","embed_threshold":0.78} diff --git a/hooks/ways/softwaredev/architecture/design/design.locales.jsonl b/hooks/ways/softwaredev/architecture/design/design.locales.jsonl new file mode 100644 index 00000000..51c993ff --- /dev/null +++ b/hooks/ways/softwaredev/architecture/design/design.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تصميم الأنظمة وأنماط التصميم ومخططات قواعد البيانات","vocabulary":"تصميم نظام أنماط قاعدة بيانات مخطط نمذجة","embed_threshold":0.74} +{"lang":"de","description":"Software-Systemdesign, Architekturmuster, Datenbankschema, Komponentenmodellierung, Vorschläge, RFCs, Entwurfsdiskussion","vocabulary":"Architekturmuster Datenbankschema Modellierung Schnittstelle Komponente Module Monolith Microservice Domäne Schicht Kopplung Kohäsion Abstraktion Vorschlag RFC Entwurf Whiteboard","embed_threshold":0.72} +{"lang":"es","description":"Diseno de sistemas — patrones, esquema de base de datos, modelado de datos","vocabulary":"diseno patron esquema base-de-datos modelado datos relaciones tablas entidades dominio","embed_threshold":0.71} +{"lang":"ja","description":"ソフトウェアシステム設計、パターン、データベーススキーマ、RFC","vocabulary":"アーキテクチャ パターン データベース スキーマ モデリング インターフェース 設計提案","embed_threshold":0.78} diff --git a/hooks/ways/softwaredev/architecture/threat-modeling/threat-modeling.locales.jsonl b/hooks/ways/softwaredev/architecture/threat-modeling/threat-modeling.locales.jsonl new file mode 100644 index 00000000..8f4724db --- /dev/null +++ b/hooks/ways/softwaredev/architecture/threat-modeling/threat-modeling.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"نمذجة التهديدات ومنهجية STRIDE","vocabulary":"نمذجة تهديدات أمن تحليل مخاطر هجمات ثغرات","embed_threshold":0.77} +{"lang":"de","description":"Bedrohungsmodellierung, STRIDE-Analyse, Vertrauensgrenzen, Angriffsflächen-Bewertung, Sicherheitsdesign-Review","vocabulary":"Bedrohungsmodell STRIDE Angriffsfläche Vertrauensgrenze Gegenmaßnahme Angreifer Spoofing Manipulation Abstreitbarkeit Rechteausweitung","embed_threshold":0.74} +{"lang":"es","description":"Modelado de amenazas — analisis STRIDE, superficie de ataque, mitigaciones","vocabulary":"amenazas STRIDE modelado ataque superficie riesgo mitigacion vulnerabilidad seguridad activos","embed_threshold":0.67} +{"lang":"ja","description":"脅威モデリング、STRIDE分析、信頼境界、攻撃面の評価","vocabulary":"脅威モデル STRIDE 攻撃面 信頼境界 緩和策 セキュリティ分析","embed_threshold":0.73} diff --git a/hooks/ways/softwaredev/code/code.locales.jsonl b/hooks/ways/softwaredev/code/code.locales.jsonl new file mode 100644 index 00000000..2365364b --- /dev/null +++ b/hooks/ways/softwaredev/code/code.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"جودة الكود والاختبار والأمان والأداء","vocabulary":"كود جودة اختبار أمان أداء برمجة","embed_threshold":0.81} +{"lang":"de","description":"Codequalität, Testen, Sicherheit und Performance — das Handwerk, Produktionscode zu schreiben und zu pflegen","vocabulary":"Code Qualität sauber refaktorisieren verbessern testen sicher performant optimieren Review Best Practice Muster Antipattern Codegeruch technische Schulden Wartbarkeit Lesbarkeit","embed_threshold":0.86} +{"lang":"es","description":"Calidad de codigo — pruebas, seguridad, rendimiento, buenas practicas","vocabulary":"codigo calidad pruebas seguridad rendimiento revision refactorizar limpio mantenible legible","embed_threshold":0.84} +{"lang":"ja","description":"コード品質、テスト、セキュリティ、パフォーマンスの総合管理","vocabulary":"コード 品質 リファクタリング テスト セキュリティ 最適化 レビュー","embed_threshold":0.79} diff --git a/hooks/ways/softwaredev/code/errors/errors.locales.jsonl b/hooks/ways/softwaredev/code/errors/errors.locales.jsonl new file mode 100644 index 00000000..b4d72fe3 --- /dev/null +++ b/hooks/ways/softwaredev/code/errors/errors.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أنماط معالجة الأخطاء والاستثناءات","vocabulary":"أخطاء معالجة استثناءات خطأ التقاط رسائل","embed_threshold":0.67} +{"lang":"de","description":"Fehlerbehandlungsmuster, Ausnahmemanagement, Try-Catch-Grenzen, Fehlerweitergabe und -umhüllung","vocabulary":"Ausnahme Fehlerbehandlung catch throw Grenze umhüllen weiterwerfen Fallback Wiederherstellung Weitergabe unbehandelt","embed_threshold":0.68} +{"lang":"es","description":"Patrones de manejo de errores — excepciones, codigos de error, recuperacion","vocabulary":"errores excepciones manejo capturar lanzar recuperacion fallo mensaje traza pila","embed_threshold":0.66} +{"lang":"ja","description":"エラーハンドリングのパターン、例外管理","vocabulary":"例外処理 エラーハンドリング キャッチ スロー ラップ フォールバック 回復","embed_threshold":0.62} diff --git a/hooks/ways/softwaredev/code/performance/performance.locales.jsonl b/hooks/ways/softwaredev/code/performance/performance.locales.jsonl new file mode 100644 index 00000000..103c87dd --- /dev/null +++ b/hooks/ways/softwaredev/code/performance/performance.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تحسين الأداء والتنميط","vocabulary":"أداء تحسين تنميط سرعة كفاءة ذاكرة","embed_threshold":0.65} +{"lang":"de","description":"Performanceoptimierung, Profiling, Benchmarking, Latenz","vocabulary":"optimieren Profiling Benchmark Latenz Durchsatz Speicher Cache Engpass Flamegraph Allokation Heap Geschwindigkeit langsam","embed_threshold":0.5} +{"lang":"es","description":"Optimizacion de rendimiento — perfilado, cuellos de botella, eficiencia","vocabulary":"rendimiento optimizar perfilado benchmark cuello-de-botella latencia memoria CPU cache eficiencia","embed_threshold":0.61} +{"lang":"ja","description":"パフォーマンス最適化、プロファイリング、ベンチマーク","vocabulary":"最適化 プロファイル ベンチマーク レイテンシ スループット メモリ キャッシュ ボトルネック","embed_threshold":0.65} diff --git a/hooks/ways/softwaredev/code/quality/quality.locales.jsonl b/hooks/ways/softwaredev/code/quality/quality.locales.jsonl new file mode 100644 index 00000000..632aa06b --- /dev/null +++ b/hooks/ways/softwaredev/code/quality/quality.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"جودة الكود وإعادة الهيكلة ومبادئ SOLID","vocabulary":"جودة كود إعادة هيكلة مبادئ نظافة صيانة","embed_threshold":0.75} +{"lang":"de","description":"Codequalität, Refactoring, SOLID-Prinzipien, Code-Review-Standards, technische Schulden, Wartbarkeit","vocabulary":"refaktorisieren Qualität SOLID Prinzip zerlegen Methode extrahieren Verantwortung Kopplung Kohäsion Wartbarkeit Lesbarkeit","embed_threshold":0.86} +{"lang":"es","description":"Calidad de codigo — refactorizacion, principios SOLID, codigo limpio","vocabulary":"calidad refactorizar SOLID principios limpio legible mantenible acoplamiento cohesion deuda-tecnica","embed_threshold":0.74} +{"lang":"ja","description":"コード品質の向上、リファクタリング、SOLID原則","vocabulary":"リファクタリング 品質 SOLID 原則 分解 結合度 凝集度 保守性","embed_threshold":0.68} diff --git a/hooks/ways/softwaredev/code/security/auth/auth.locales.jsonl b/hooks/ways/softwaredev/code/security/auth/auth.locales.jsonl new file mode 100644 index 00000000..af011002 --- /dev/null +++ b/hooks/ways/softwaredev/code/security/auth/auth.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"المصادقة والتفويض والتحكم بالوصول","vocabulary":"مصادقة تفويض صلاحيات تحكم وصول أدوار","embed_threshold":0.66} +{"lang":"de","description":"Authentifizierung, Autorisierung, Zugriffskontrolle, Middleware-Guards, RBAC, Berechtigungen","vocabulary":"Authentifizierung Autorisierung Middleware Guard Berechtigung Rolle RBAC Zugriffskontrolle Login Sitzung JWT CSRF CORS","embed_threshold":0.65} +{"lang":"es","description":"Autenticacion y autorizacion — RBAC, tokens, permisos, sesiones","vocabulary":"autenticacion autorizacion RBAC token JWT sesion permiso rol acceso identidad","embed_threshold":0.62} +{"lang":"ja","description":"認証・認可、アクセス制御、RBAC","vocabulary":"認証 認可 ミドルウェア ガード 権限 ロール RBAC ログイン","embed_threshold":0.62} diff --git a/hooks/ways/softwaredev/code/security/injection/injection.locales.jsonl b/hooks/ways/softwaredev/code/security/injection/injection.locales.jsonl new file mode 100644 index 00000000..9a4744f0 --- /dev/null +++ b/hooks/ways/softwaredev/code/security/injection/injection.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الوقاية من هجمات الحقن","vocabulary":"حقن وقاية تعقيم مدخلات هجمات SQL XSS","embed_threshold":0.61} +{"lang":"de","description":"Injection-Prävention, SQL-Injection, XSS, Befehlsinjektion, Eingabebereinigung","vocabulary":"Injection SQL XSS innerHTML parametrisiert bereinigen escapen Shell Befehlsinjektion Template Interpolation","embed_threshold":0.57} +{"lang":"es","description":"Prevencion de inyeccion — SQL, XSS, comandos, sanitizacion de entrada","vocabulary":"inyeccion SQL XSS sanitizar entrada validar escapar parametrizado comando prevencion","embed_threshold":0.58} +{"lang":"ja","description":"インジェクション防止、SQLインジェクション、XSS対策","vocabulary":"インジェクション SQL XSS パラメータ化 サニタイズ エスケープ コマンド注入","embed_threshold":0.59} diff --git a/hooks/ways/softwaredev/code/security/secrets/secrets.locales.jsonl b/hooks/ways/softwaredev/code/security/secrets/secrets.locales.jsonl new file mode 100644 index 00000000..fdb71329 --- /dev/null +++ b/hooks/ways/softwaredev/code/security/secrets/secrets.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إدارة الأسرار وبيانات الاعتماد","vocabulary":"أسرار بيانات اعتماد مفاتيح كلمات مرور خزنة","embed_threshold":0.68} +{"lang":"de","description":"Geheimnisse verwalten, Zugangsdaten-Hygiene, .env-Dateien, API-Schlüssel, Passwortspeicherung","vocabulary":"Geheimnis Zugangsdaten Passwort Token Schlüssel env API-Key rotieren offengelegt .env gitignore bcrypt Hash verschlüsseln","embed_threshold":0.71} +{"lang":"es","description":"Gestion de secretos — credenciales, claves API, rotacion, almacenamiento seguro","vocabulary":"secretos credenciales clave API rotacion boveda cifrado variable-entorno seguro filtracion","embed_threshold":0.68} +{"lang":"ja","description":"シークレット管理、認証情報の衛生管理、APIキーの取り扱い","vocabulary":"シークレット 認証情報 パスワード トークン 鍵 APIキー ローテーション 暗号化","embed_threshold":0.65} diff --git a/hooks/ways/softwaredev/code/security/security.locales.jsonl b/hooks/ways/softwaredev/code/security/security.locales.jsonl new file mode 100644 index 00000000..45715e68 --- /dev/null +++ b/hooks/ways/softwaredev/code/security/security.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"نظرة عامة على الأمان والبرمجة الآمنة","vocabulary":"أمان برمجة آمنة حماية ثغرات دفاع","embed_threshold":0.84} +{"lang":"de","description":"Sicherheitsüberblick, sichere Programmierstandards, Sicherheitscheckliste","vocabulary":"Sicherheit Schwachstelle schützen Verteidigung absichern härten OWASP Sicherheitsüberprüfung","embed_threshold":0.79} +{"lang":"es","description":"Seguridad general — codificacion segura, revision de vulnerabilidades, defensa en profundidad","vocabulary":"seguridad vulnerable codificacion-segura defensa OWASP auditoria riesgo parche endurecimiento","embed_threshold":0.78} +{"lang":"ja","description":"セキュリティ概要、セキュアコーディングのデフォルト設定","vocabulary":"セキュリティ 脆弱性 防御 保護 堅牢化 OWASP セキュアコーディング","embed_threshold":0.74} diff --git a/hooks/ways/softwaredev/code/supplychain/automation/automation.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/automation/automation.locales.jsonl new file mode 100644 index 00000000..2139b187 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/automation/automation.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أتمتة فحص الأمان في سلسلة التوريد","vocabulary":"أتمتة فحص أمان مسح آلي تكامل مستمر","embed_threshold":0.77} +{"lang":"de","description":"Sicherheitsscanning-Automatisierung, GitHub Actions, Dependabot, CodeQL, Makefile-Audit-Targets","vocabulary":"GitHub Action Dependabot CodeQL Sicherheitsscanning Automatisierung CI Pipeline SBOM Scorecard Audit Workflow Sicherheitsrichtlinie","embed_threshold":0.73} +{"lang":"es","description":"Automatizacion de escaneo de seguridad — CI/CD, analisis estatico, pipelines","vocabulary":"automatizacion escaneo seguridad CI CD pipeline analisis-estatico SAST DAST integrar","embed_threshold":0.67} +{"lang":"ja","description":"セキュリティスキャンの自動化、GitHub Actions、Dependabot","vocabulary":"GitHub Actions Dependabot CodeQL スキャン 自動化 CI パイプライン","embed_threshold":0.66} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/depscan.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/depscan.locales.jsonl new file mode 100644 index 00000000..9f12a0f9 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/depscan.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"فحص الثغرات في التبعيات","vocabulary":"تبعيات ثغرات فحص مسح أمان مكتبات","embed_threshold":0.71} +{"lang":"de","description":"Abhängigkeits-Schwachstellenscanning, Lockfile-Auditing, Paketsicherheit","vocabulary":"osv-scanner pip-audit npm-audit cargo-audit govulncheck Abhängigkeitsscan Schwachstelle CVE Lockfile Requirements Paketsicherheit SBOM","embed_threshold":0.67} +{"lang":"es","description":"Escaneo de vulnerabilidades en dependencias — auditar, actualizar, parchar","vocabulary":"dependencias vulnerabilidad escaneo auditar CVE parche actualizar SCA composicion","embed_threshold":0.72} +{"lang":"ja","description":"依存関係の脆弱性スキャン、ロックファイル監査","vocabulary":"脆弱性スキャン 依存関係 監査 CVE osv-scanner pip-audit npm-audit","embed_threshold":0.71} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/go/go.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/go/go.locales.jsonl new file mode 100644 index 00000000..887c8704 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/go/go.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أمان تبعيات لغة Go","vocabulary":"Go تبعيات أمان وحدات فحص ثغرات","embed_threshold":0.69} +{"lang":"de","description":"Go-Abhängigkeitssicherheit, govulncheck, Modulverifizierung, Replace-Direktiven","vocabulary":"govulncheck go.sum go.mod Replace-Direktive go-install go-get Modul Proxy Prüfsumme","embed_threshold":0.46} +{"lang":"es","description":"Seguridad de dependencias en Go — modulos, govulncheck, actualizaciones","vocabulary":"Go modulos govulncheck dependencias seguridad go.mod go.sum vulnerabilidad actualizar","embed_threshold":0.67} +{"lang":"ja","description":"Goの依存関係セキュリティ、govulncheck","vocabulary":"govulncheck go.sum go.mod モジュール プロキシ チェックサム","embed_threshold":0.5} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/node/lockfile/lockfile.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/node/lockfile/lockfile.locales.jsonl new file mode 100644 index 00000000..49d8fba5 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/node/lockfile/lockfile.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"نظافة ملفات القفل وتثبيت الإصدارات","vocabulary":"ملف قفل إصدارات تثبيت نظافة تبعيات npm","embed_threshold":0.65} +{"lang":"de","description":"Lockfile-Hygiene, Versionspinning, Supply-Chain-Angriffe durch Abhängigkeitsauflösung verhindern","vocabulary":"Lockfile package-lock.json yarn.lock pnpm-lock pinnen exakte Version Caret Semver npm-ci transitiv Auflösung","embed_threshold":0.62} +{"lang":"es","description":"Higiene de archivos de bloqueo — package-lock.json, integridad, consistencia","vocabulary":"lockfile bloqueo package-lock integridad consistencia npm yarn pnpm reproducible deterministico","embed_threshold":0.61} +{"lang":"ja","description":"ロックファイルの衛生管理、バージョン固定、サプライチェーン攻撃対策","vocabulary":"ロックファイル package-lock.json yarn.lock バージョン固定 semver","embed_threshold":0.53} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/node/node.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/node/node.locales.jsonl new file mode 100644 index 00000000..a4dc079a --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/node/node.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أمان تبعيات Node.js","vocabulary":"Node.js تبعيات أمان npm حزم فحص ثغرات","embed_threshold":0.6} +{"lang":"de","description":"Node.js-Abhängigkeitssicherheit, npm-audit, Postinstall-Skripte, Typosquatting","vocabulary":"npm-audit package-lock.json node_modules postinstall preinstall yarn pnpm npx Typosquatting JavaScript TypeScript","embed_threshold":0.62} +{"lang":"es","description":"Seguridad de dependencias en Node.js — npm audit, paquetes maliciosos","vocabulary":"Node npm audit dependencias seguridad paquete malicioso vulnerabilidad node_modules actualizar","embed_threshold":0.71} +{"lang":"ja","description":"Node.jsの依存関係セキュリティ、npm audit","vocabulary":"npm audit package-lock.json postinstall タイポスクワッティング","embed_threshold":0.5} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/python/python.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/python/python.locales.jsonl new file mode 100644 index 00000000..4be362fa --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/python/python.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أمان تبعيات Python","vocabulary":"Python تبعيات أمان pip حزم فحص ثغرات","embed_threshold":0.61} +{"lang":"de","description":"Python-Abhängigkeitssicherheit, pip-audit, setup.py-Risiken, PyPI-Typosquatting","vocabulary":"pip-audit setup.py pyproject.toml requirements.txt Wheel sdist PyPI Typosquatting Safety pip-install Python Paket","embed_threshold":0.56} +{"lang":"es","description":"Seguridad de dependencias en Python — pip audit, safety, entornos virtuales","vocabulary":"Python pip audit safety dependencias seguridad requirements virtualenv vulnerabilidad PyPI","embed_threshold":0.6} +{"lang":"ja","description":"Pythonの依存関係セキュリティ、pip-audit","vocabulary":"pip-audit setup.py pyproject.toml requirements.txt タイポスクワッティング","embed_threshold":0.54} diff --git a/hooks/ways/softwaredev/code/supplychain/depscan/rust/rust.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/depscan/rust/rust.locales.jsonl new file mode 100644 index 00000000..d50134ef --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/depscan/rust/rust.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أمان تبعيات Rust","vocabulary":"Rust تبعيات أمان cargo صناديق فحص ثغرات","embed_threshold":0.57} +{"lang":"de","description":"Rust-Abhängigkeitssicherheit, cargo-audit, Unsafe-Blöcke, Build-Skript-Risiken","vocabulary":"cargo-audit Cargo.lock Cargo.toml unsafe build.rs Crate crates.io RustSec Sicherheitshinweis","embed_threshold":0.62} +{"lang":"es","description":"Seguridad de dependencias en Rust — cargo audit, crates, cadena de confianza","vocabulary":"Rust cargo audit crates dependencias seguridad Cargo.lock vulnerabilidad advisory confianza","embed_threshold":0.68} +{"lang":"ja","description":"Rustの依存関係セキュリティ、cargo audit","vocabulary":"cargo audit Cargo.lock unsafe build.rs クレート rustsec","embed_threshold":0.59} diff --git a/hooks/ways/softwaredev/code/supplychain/historysever/historysever.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/historysever/historysever.locales.jsonl new file mode 100644 index 00000000..dcb14398 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/historysever/historysever.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تسطيح تاريخ Git وقطع السجلات","vocabulary":"تاريخ Git تسطيح قطع سجلات تنظيف","embed_threshold":0.79} +{"lang":"de","description":"Git-Historienbereinigung, Fork-Trennung, BFG-Cleanup, Geheimnisse aus Git-Historie entfernen","vocabulary":"Orphan-Branch Historie abflachen Fork trennen BFG git-reflog gc prune eigenständig Fork löschen Historienbereinigung","embed_threshold":0.7} +{"lang":"es","description":"Aplanamiento de historial git — squash, rebase, limpieza de ramas","vocabulary":"historial git aplanar squash rebase limpiar rama commits condensar simplificar","embed_threshold":0.48} +{"lang":"ja","description":"git履歴のフラット化、フォーク切断、BFGクリーンアップ","vocabulary":"孤立ブランチ 履歴切断 フラット化 bfg reflog gc prune","embed_threshold":0.68} diff --git a/hooks/ways/softwaredev/code/supplychain/repoaudit/repoaudit.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/repoaudit/repoaudit.locales.jsonl new file mode 100644 index 00000000..df4227d6 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/repoaudit/repoaudit.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تدقيق تاريخ Git والكشف عن الأسرار المسربة","vocabulary":"تدقيق مستودع تاريخ أسرار مسربة فحص كشف","embed_threshold":0.76} +{"lang":"de","description":"Git-Historien-Audit, Erkennung geleakter Geheimnisse, Binärdateien in Repositories aufspüren","vocabulary":"Git-Historie große Objekte geleakte Geheimnisse committed gitignored Binärdatei Blob git-rev-list Repogröße Geheimnisscan API-Schlüssel Token Passwort","embed_threshold":0.73} +{"lang":"es","description":"Auditoria de historial git — secretos filtrados, archivos sensibles, limpieza","vocabulary":"auditoria git historial secretos filtrados sensible limpiar BFG trufflehog gitleaks","embed_threshold":0.79} +{"lang":"ja","description":"git履歴の監査、漏洩シークレットの検出、バイナリブロブの調査","vocabulary":"git履歴 シークレット漏洩 バイナリブロブ 認証情報 スキャン 監査","embed_threshold":0.69} diff --git a/hooks/ways/softwaredev/code/supplychain/sourceaudit/sourceaudit.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/sourceaudit/sourceaudit.locales.jsonl new file mode 100644 index 00000000..a98152f2 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/sourceaudit/sourceaudit.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تدقيق أمان الكود المصدري","vocabulary":"تدقيق كود مصدري أمان مراجعة فحص ثغرات","embed_threshold":0.84} +{"lang":"de","description":"Quellcode-Sicherheitsaudit auf gefährliche Muster, Verschleierung, Datenexfiltration","vocabulary":"eval exec verschleiert base64 pickle Deserialisierung Exfiltration Shell-Injection Subprocess os.system innerHTML gefährliches Muster Codeaudit Quellcodeprüfung","embed_threshold":0.61} +{"lang":"es","description":"Auditoria de seguridad de codigo fuente — revision manual, patrones peligrosos","vocabulary":"auditoria codigo-fuente revision seguridad patron peligroso manual inspeccion hallazgos riesgo","embed_threshold":0.77} +{"lang":"ja","description":"ソースコードのセキュリティ監査、危険なパターンの検出","vocabulary":"eval exec 難読化 base64 情報流出 シェル注入 危険パターン","embed_threshold":0.76} diff --git a/hooks/ways/softwaredev/code/supplychain/supplychain.locales.jsonl b/hooks/ways/softwaredev/code/supplychain/supplychain.locales.jsonl new file mode 100644 index 00000000..d6afa7f3 --- /dev/null +++ b/hooks/ways/softwaredev/code/supplychain/supplychain.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"أمان سلسلة توريد البرمجيات","vocabulary":"سلسلة توريد أمان برمجيات تبعيات حماية","embed_threshold":0.84} +{"lang":"de","description":"Lieferkettensicherheit, Repository-Vertrauensbewertung, Code aus unbekannten Quellen evaluieren","vocabulary":"Lieferkette Vertrauensbewertung geforktes Repo unbekanntes Repo Audit Abhängigkeitsscan Schwachstelle Backdoor Herkunft Klon Fork extern Drittanbieter","embed_threshold":0.69} +{"lang":"es","description":"Seguridad de la cadena de suministro — dependencias, procedencia, integridad","vocabulary":"cadena-de-suministro seguridad dependencias procedencia integridad SBOM firma verificar confianza","embed_threshold":0.68} +{"lang":"ja","description":"サプライチェーンセキュリティ、リポジトリの信頼性評価","vocabulary":"サプライチェーン 信頼性 評価 監査 依存関係 脆弱性 悪意あるコード","embed_threshold":0.75} diff --git a/hooks/ways/softwaredev/code/testing/mocking/mocking.locales.jsonl b/hooks/ways/softwaredev/code/testing/mocking/mocking.locales.jsonl new file mode 100644 index 00000000..f8284428 --- /dev/null +++ b/hooks/ways/softwaredev/code/testing/mocking/mocking.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"بدائل الاختبار والمحاكاة","vocabulary":"محاكاة بدائل اختبار وهمي تقليد حقن","embed_threshold":0.74} +{"lang":"de","description":"Abhängigkeiten mocken, Testdoubles, Fakes, Stubs, Spies, Dependency Injection für Tests","vocabulary":"Mock Fake Stub Spy Double Abhängigkeit injizieren extern isolieren Testdouble","embed_threshold":0.57} +{"lang":"es","description":"Dobles de prueba — mocks, stubs, espias, inyeccion de dependencias","vocabulary":"mock stub espia doble prueba simular inyeccion dependencia falso aislamiento","embed_threshold":0.76} +{"lang":"ja","description":"依存関係のモック化、テストダブル、フェイク、スパイ","vocabulary":"モック フェイク スタブ スパイ テストダブル 依存注入 分離","embed_threshold":0.59} diff --git a/hooks/ways/softwaredev/code/testing/tdd/tdd.locales.jsonl b/hooks/ways/softwaredev/code/testing/tdd/tdd.locales.jsonl new file mode 100644 index 00000000..88e29497 --- /dev/null +++ b/hooks/ways/softwaredev/code/testing/tdd/tdd.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"التطوير الموجه بالاختبارات","vocabulary":"تطوير موجه اختبارات أحمر أخضر إعادة هيكلة دورة","embed_threshold":0.6} +{"lang":"de","description":"Testgetriebene Entwicklung, TDD Red-Green-Refactor-Zyklus, zuerst den fehlschlagenden Test schreiben","vocabulary":"TDD Red Green Refactor Test zuerst Implementierung fehlschlagend testgetrieben","embed_threshold":0.56} +{"lang":"es","description":"Desarrollo guiado por pruebas — ciclo rojo-verde-refactorizar, diseno emergente","vocabulary":"TDD desarrollo-guiado-por-pruebas rojo verde refactorizar ciclo primero-prueba diseno emergente","embed_threshold":0.57} +{"lang":"ja","description":"テスト駆動開発、レッド・グリーン・リファクタのサイクル","vocabulary":"TDD テスト駆動 レッド グリーン リファクタ テストファースト 失敗テスト","embed_threshold":0.63} diff --git a/hooks/ways/softwaredev/code/testing/testing.locales.jsonl b/hooks/ways/softwaredev/code/testing/testing.locales.jsonl new file mode 100644 index 00000000..d45cdac3 --- /dev/null +++ b/hooks/ways/softwaredev/code/testing/testing.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تغطية الاختبارات والتأكيدات والتجهيزات","vocabulary":"اختبار تغطية تأكيدات تجهيزات فحص وحدة تكامل","embed_threshold":0.76} +{"lang":"de","description":"Testabdeckung, Teststruktur, Assertions, Fixtures, was und wie getestet werden soll","vocabulary":"Test Abdeckung Assertion Framework Spec Fixture describe expect verifizieren Unit Integration Teststruktur","embed_threshold":0.61} +{"lang":"es","description":"Pruebas de software — cobertura, aserciones, fixtures, estrategias de prueba","vocabulary":"pruebas cobertura asercion fixture unitaria integracion e2e estrategia suite ejecutar","embed_threshold":0.62} +{"lang":"ja","description":"テストカバレッジ、構造、アサーション、フィクスチャ","vocabulary":"テスト カバレッジ アサーション フレームワーク フィクスチャ 検証 単体テスト 結合テスト","embed_threshold":0.76} diff --git a/hooks/ways/softwaredev/delivery/branching/branching.locales.jsonl b/hooks/ways/softwaredev/delivery/branching/branching.locales.jsonl new file mode 100644 index 00000000..fecdaeb9 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/branching/branching.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إرشادات التفريع في Git","vocabulary":"تفريع Git فرع دمج استراتيجية إصدار","embed_threshold":0.82} +{"lang":"de","description":"Git-Branch-Strategien — Worktrees, Feature-Branches und Trunk-basierte Entwicklung","vocabulary":"Branch Zweig auschecken Worktree Hauptzweig Trunk Feature-Branch Bugfix-Branch Refaktorisierungsbranch Verzweigung Zusammenführung","embed_threshold":0.59} +{"lang":"es","description":"Estrategias de ramificacion en Git — ramas de funcionalidad, flujo de trabajo","vocabulary":"rama branch git estrategia funcionalidad feature trunk flujo-de-trabajo fusionar merge","embed_threshold":0.69} +{"lang":"ja","description":"Gitブランチの管理とブランチ戦略のガイダンス","vocabulary":"ブランチ チェックアウト ワークツリー メイン トランク フィーチャー 修正","embed_threshold":0.7} diff --git a/hooks/ways/softwaredev/delivery/branching/branching.md b/hooks/ways/softwaredev/delivery/branching/branching.md new file mode 100644 index 00000000..e21ffada --- /dev/null +++ b/hooks/ways/softwaredev/delivery/branching/branching.md @@ -0,0 +1,44 @@ +--- +description: Git branch awareness and branching guidance when editing files +vocabulary: branch checkout worktree main trunk feature fix refactor +files: \.(md|rs|sh|py|js|ts|json|yaml|yml|toml|go|rb|java|c|cpp|h|hpp|css|html|sql)$ +macro: prepend +scope: agent, subagent +--- + +# Branching Context + +## Where You Are Matters + +The macro above shows the current git branch and state. Glance at it before writing — if you're on `main` and about to make a non-trivial change, consider branching first. + +## When to Branch + +This is guidance, not a gate. Use judgment: + +| Situation | Branch? | +|-----------|---------| +| Exploration, temp edits, quick config tweaks | Main is fine | +| Bug fix that'll become a PR | Yes — `fix/description` | +| New feature or capability | Yes — `feat/description` | +| Refactoring across multiple files | Yes — `refactor/description` | +| Documentation updates | Judgment call — `docs/description` if substantial | + +## Why It Matters + +- Branches make work reversible without `git stash` gymnastics +- A branch is a PR draft — you can push it and walk away +- Committing to main means force-push is the only undo for public repos +- Branches let you context-switch cleanly between tasks + +## Branch Naming + +Use prefixes that match conventional commit types: + +- `fix/` — bug fixes +- `feat/` — new features +- `docs/` — documentation +- `refactor/` — restructuring without behavior change +- `adr-NNN-topic` — ADR implementation work + +Keep names short, lowercase, hyphen-separated. The branch name often becomes the PR title. diff --git a/hooks/ways/softwaredev/delivery/branching/macro.sh b/hooks/ways/softwaredev/delivery/branching/macro.sh new file mode 100755 index 00000000..61e0b550 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/branching/macro.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# Dynamic git branch context for branching way +# Output: terse one-line summary of current git state +# MUST complete in under 1 second — never block on failure + +# Not a git repo? Say so and exit. +git rev-parse --is-inside-work-tree &>/dev/null || { + echo "Git: not a repository" + exit 0 +} + +# Current branch (or detached HEAD) +BRANCH=$(git branch --show-current 2>/dev/null) +if [[ -z "$BRANCH" ]]; then + BRANCH="detached:$(git rev-parse --short HEAD 2>/dev/null)" +fi + +# Clean or dirty +if [[ -z "$(git status --porcelain 2>/dev/null)" ]]; then + STATE="clean" +else + STATE="dirty" +fi + +# Ahead/behind upstream (may not have upstream set) +AHEAD_BEHIND="" +if COUNTS=$(git rev-list --left-right --count HEAD...@{upstream} 2>/dev/null); then + AHEAD=$(echo "$COUNTS" | cut -f1) + BEHIND=$(echo "$COUNTS" | cut -f2) + if [[ "$AHEAD" -gt 0 ]] || [[ "$BEHIND" -gt 0 ]]; then + AHEAD_BEHIND=", ${AHEAD} ahead/${BEHIND} behind" + fi +fi + +# Remote repo: extract owner/repo from origin URL +REPO="" +if ORIGIN=$(git remote get-url origin 2>/dev/null); then + # Handle both SSH and HTTPS URLs + REPO=$(echo "$ORIGIN" | sed -E 's#^(https?://[^/]+/|git@[^:]+:)##; s#\.git$##') +fi + +# Assemble output +if [[ -n "$REPO" ]]; then + echo "Git: ${BRANCH} (${STATE}${AHEAD_BEHIND}) repo: ${REPO}" +else + echo "Git: ${BRANCH} (${STATE}${AHEAD_BEHIND})" +fi + +exit 0 diff --git a/hooks/ways/softwaredev/delivery/commits/commits.locales.jsonl b/hooks/ways/softwaredev/delivery/commits/commits.locales.jsonl new file mode 100644 index 00000000..d073b358 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/commits/commits.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"رسائل الإيداع والإيداعات التقليدية","vocabulary":"إيداع رسالة تقليدي Git تاريخ وصف تغيير","embed_threshold":0.69} +{"lang":"de","description":"Git-Commit-Nachrichten und Conventional Commits — atomare, aussagekräftige Änderungshistorie","vocabulary":"Commit Nachricht Branch konventionell feat fix Geltungsbereich atomar zusammenfassen squash Änderungshistorie Commitnachricht","embed_threshold":0.62} +{"lang":"es","description":"Mensajes de commit — commits convencionales, historial limpio, atomicidad","vocabulary":"commit mensaje convencional historial atomico descriptivo prefijo feat fix chore cambio","embed_threshold":0.56} +{"lang":"ja","description":"gitコミットメッセージ、コンベンショナルコミット","vocabulary":"コミット メッセージ ブランチ コンベンショナル feat fix スコープ","embed_threshold":0.72} diff --git a/hooks/ways/softwaredev/delivery/delivery.locales.jsonl b/hooks/ways/softwaredev/delivery/delivery.locales.jsonl new file mode 100644 index 00000000..04464259 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/delivery.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"شحن الكود إلى بيئة الإنتاج","vocabulary":"تسليم شحن إنتاج نشر إطلاق توزيع","embed_threshold":0.48} +{"lang":"de","description":"Code ausliefern — Commits, Pull Requests, Releases und Deployment in Produktion","vocabulary":"ausliefern deployen veröffentlichen Release Commit pushen mergen Pull-Request Pipeline Produktion Bereitstellung Auslieferung","embed_threshold":0.58} +{"lang":"es","description":"Entrega de software — enviar codigo a produccion, despliegue, pipeline","vocabulary":"entrega despliegue produccion pipeline enviar lanzar CI CD automatizar flujo","embed_threshold":0.67} +{"lang":"ja","description":"コードの出荷 — コミット、PR、リリース、本番環境へのデプロイ","vocabulary":"出荷 デリバリー デプロイ リリース コミット プッシュ マージ パイプライン 本番","embed_threshold":0.62} diff --git a/hooks/ways/softwaredev/delivery/github/github.locales.jsonl b/hooks/ways/softwaredev/delivery/github/github.locales.jsonl new file mode 100644 index 00000000..25dba6d8 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/github/github.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"طلبات السحب والمشكلات والتكامل المستمر على GitHub","vocabulary":"GitHub طلب سحب مشكلة تكامل مستمر مراجعة","embed_threshold":0.72} +{"lang":"de","description":"GitHub-Workflows — Pull Requests, Issues, Code-Review und CI-Pipelines","vocabulary":"Pull-Request Issue Review Prüfung CI Label Fork Repository mergen zusammenführen GitHub Codeüberprüfung","embed_threshold":0.67} +{"lang":"es","description":"Flujo de trabajo en GitHub — pull requests, issues, CI, Actions","vocabulary":"GitHub PR pull-request issue CI Actions workflow revision colaboracion repositorio","embed_threshold":0.61} +{"lang":"ja","description":"GitHubのプルリクエスト、イシュー、コードレビュー、CI","vocabulary":"PR プルリクエスト イシュー レビュー チェック CI ラベル リポジトリ マージ","embed_threshold":0.66} diff --git a/hooks/ways/softwaredev/delivery/implement/implement.locales.jsonl b/hooks/ways/softwaredev/delivery/implement/implement.locales.jsonl new file mode 100644 index 00000000..d4d484f5 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/implement/implement.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تخطيط التنفيذ بعد القرار المعماري","vocabulary":"تنفيذ تخطيط بعد قرار معماري خطوات مهام","embed_threshold":0.9} +{"lang":"de","description":"Umsetzung nach ADR-Entscheidung — Arbeitspakete schneiden und parallelisieren","vocabulary":"implementieren umsetzen bauen beginnen starten ausführen Plan Arbeitspaket aufteilen parallelisieren Umsetzung Durchführung","embed_threshold":0.71} +{"lang":"es","description":"Planificacion de implementacion post-ADR — desglose de tareas, secuencia, riesgos","vocabulary":"implementar planificar tarea desglose secuencia riesgo alcance fase iteracion post-ADR","embed_threshold":0.7} +{"lang":"ja","description":"ADR後の実装 — 作業分解、並列化の計画","vocabulary":"実装 構築 開始 実行 計画 作業分解 並列化","embed_threshold":0.74} diff --git a/hooks/ways/softwaredev/delivery/migrations/migrations.locales.jsonl b/hooks/ways/softwaredev/delivery/migrations/migrations.locales.jsonl new file mode 100644 index 00000000..981f950f --- /dev/null +++ b/hooks/ways/softwaredev/delivery/migrations/migrations.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"ترحيل قواعد البيانات","vocabulary":"ترحيل قاعدة بيانات مخطط تحديث هجرة تغيير","embed_threshold":0.54} +{"lang":"de","description":"Datenbankmigrationen — Schemaänderungen, Rollbacks und Seed-Daten","vocabulary":"Migration Schema Tabelle Spalte Index Rollback Seed DDL Schemaänderung Datenbankmigration Rückführung","embed_threshold":0.57} +{"lang":"es","description":"Migraciones de base de datos — esquema, versionado, rollback, datos","vocabulary":"migracion base-de-datos esquema versionado rollback ALTER tabla columna datos reversible","embed_threshold":0.58} +{"lang":"ja","description":"データベースマイグレーション、スキーマ変更、ロールバック","vocabulary":"マイグレーション スキーマ テーブル変更 カラム インデックス ロールバック DDL","embed_threshold":0.71} diff --git a/hooks/ways/softwaredev/delivery/patches/patches.locales.jsonl b/hooks/ways/softwaredev/delivery/patches/patches.locales.jsonl new file mode 100644 index 00000000..31a9bfb3 --- /dev/null +++ b/hooks/ways/softwaredev/delivery/patches/patches.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"ملفات التصحيح وفروقات Git","vocabulary":"تصحيح رقعة فرق Git ملف تعديل","embed_threshold":0.82} +{"lang":"de","description":"Patch-Dateien erstellen und anwenden — git diff, Hunks und Patchserien","vocabulary":"Patch Diff anwenden Hunk unified Patchserie format-patch Änderungsdatei Flicken","embed_threshold":0.55} +{"lang":"es","description":"Archivos de parche — git diff, aplicar parches, formato unificado","vocabulary":"parche diff patch aplicar formato unificado git hunks cambio archivo","embed_threshold":0.68} +{"lang":"ja","description":"パッチファイルの作成と適用、git diff","vocabulary":"パッチ diff 適用 ハンク 統一形式 format-patch","embed_threshold":0.53} diff --git a/hooks/ways/softwaredev/delivery/release/release.locales.jsonl b/hooks/ways/softwaredev/delivery/release/release.locales.jsonl new file mode 100644 index 00000000..246db1cf --- /dev/null +++ b/hooks/ways/softwaredev/delivery/release/release.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الإصدارات وسجل التغييرات والإصدار الدلالي","vocabulary":"إصدار سجل تغييرات دلالي نشر ملاحظات","embed_threshold":0.73} +{"lang":"de","description":"Software-Releases — Changelog, Versionierung und semantische Versionsnummern","vocabulary":"Release Changelog Version Versionsbump Semver Tag veröffentlichen ausliefern Freigabe Versionserhöhung","embed_threshold":0.76} +{"lang":"es","description":"Lanzamientos — changelog, versionado semantico, etiquetas, notas de version","vocabulary":"lanzamiento release changelog semver versionado etiqueta tag notas version publicar","embed_threshold":0.66} +{"lang":"ja","description":"ソフトウェアリリース、変更履歴、バージョン管理、semver","vocabulary":"リリース 変更履歴 バージョン semver タグ 公開 出荷","embed_threshold":0.76} diff --git a/hooks/ways/softwaredev/docs/api/api.locales.jsonl b/hooks/ways/softwaredev/docs/api/api.locales.jsonl new file mode 100644 index 00000000..770b9295 --- /dev/null +++ b/hooks/ways/softwaredev/docs/api/api.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تصميم واجهات برمجة التطبيقات REST","vocabulary":"واجهة برمجة تطبيقات REST API تصميم نقاط وصول","embed_threshold":0.62} +{"lang":"de","description":"API-Entwurf — REST-Endpunkte, HTTP-Schnittstellen, Versionierung und Paginierung","vocabulary":"Endpunkt API REST Route HTTP Paginierung Versionierung GraphQL Webhook Schnittstelle API-Design","embed_threshold":0.5} +{"lang":"es","description":"Diseno de API REST — endpoints, verbos HTTP, codigos de estado, OpenAPI","vocabulary":"API REST endpoint HTTP GET POST PUT DELETE estado OpenAPI diseno recurso","embed_threshold":0.51} +{"lang":"ja","description":"REST API設計、HTTPエンドポイント、バージョニング","vocabulary":"エンドポイント API REST ルート HTTP ページネーション バージョニング GraphQL Webhook","embed_threshold":0.46} diff --git a/hooks/ways/softwaredev/docs/docs.locales.jsonl b/hooks/ways/softwaredev/docs/docs.locales.jsonl new file mode 100644 index 00000000..d31e0b49 --- /dev/null +++ b/hooks/ways/softwaredev/docs/docs.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"فلسفة التوثيق ومبادئه","vocabulary":"توثيق وثائق فلسفة كتابة تقنية شرح","embed_threshold":0.7} +{"lang":"de","description":"Dokumentationsphilosophie — Markdown-Konventionen und technische Prosa","vocabulary":"Dokumentation Markdown technisch Prosa Projektdokumentation Anleitung Beschreibung Handbuch","embed_threshold":0.67} +{"lang":"es","description":"Filosofia de documentacion — escribir para la audiencia, mantener actualizado","vocabulary":"documentacion escribir guia manual audiencia mantener actualizar claro conciso","embed_threshold":0.76} +{"lang":"ja","description":"ドキュメントの考え方、Markdownの規約","vocabulary":"ドキュメント マークダウン 技術文書 プロジェクト文書 文書化","embed_threshold":0.75} diff --git a/hooks/ways/softwaredev/docs/docstrings/docstrings.locales.jsonl b/hooks/ways/softwaredev/docs/docstrings/docstrings.locales.jsonl new file mode 100644 index 00000000..41df83cc --- /dev/null +++ b/hooks/ways/softwaredev/docs/docstrings/docstrings.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"توثيق الكود والتعليقات التوضيحية","vocabulary":"توثيق كود تعليقات توضيحية وصف دوال واجهة","embed_threshold":0.71} +{"lang":"de","description":"Codedokumentation — Docstrings, JSDoc, Inline-Kommentare und Annotationen","vocabulary":"Docstring JSDoc GoDoc PyDoc RustDoc Kommentar Annotation Codedokumentation Inline-Dokumentation Funktionsbeschreibung","embed_threshold":0.69} +{"lang":"es","description":"Documentacion de codigo — docstrings, comentarios, anotaciones de tipo","vocabulary":"docstring comentario documentar funcion parametro retorno tipo anotacion JSDoc pydoc","embed_threshold":0.63} +{"lang":"ja","description":"コードドキュメント、docstring、JSDoc、インラインコメント","vocabulary":"docstring JSDoc コメント アノテーション コード文書化 説明","embed_threshold":0.6} diff --git a/hooks/ways/softwaredev/docs/mermaid/mermaid.locales.jsonl b/hooks/ways/softwaredev/docs/mermaid/mermaid.locales.jsonl new file mode 100644 index 00000000..4cb38ec2 --- /dev/null +++ b/hooks/ways/softwaredev/docs/mermaid/mermaid.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"رسومات Mermaid البيانية","vocabulary":"Mermaid رسم بياني مخطط تدفق تسلسل","embed_threshold":0.9} +{"lang":"de","description":"Mermaid-Diagramme — Flussdiagramme, Sequenzdiagramme und Zustandsautomaten","vocabulary":"Mermaid Diagramm Flussdiagramm Sequenzdiagramm Zustandsautomat Gantt Zeitstrahl Schaubild Ablaufdiagramm","embed_threshold":0.9} +{"lang":"es","description":"Diagramas Mermaid — flujo, secuencia, clases, entidad-relacion","vocabulary":"Mermaid diagrama flujo secuencia clase entidad relacion grafico sintaxis renderizar","embed_threshold":0.87} +{"lang":"ja","description":"Mermaid図、フローチャート、シーケンス図の作成","vocabulary":"Mermaid 図 フローチャート シーケンス図 状態遷移 ガントチャート タイムライン","embed_threshold":0.9} diff --git a/hooks/ways/softwaredev/docs/readme/readme.locales.jsonl b/hooks/ways/softwaredev/docs/readme/readme.locales.jsonl new file mode 100644 index 00000000..91bbbfd1 --- /dev/null +++ b/hooks/ways/softwaredev/docs/readme/readme.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تأليف ملفات README","vocabulary":"README ملف تعريف مشروع وصف بداية","embed_threshold":0.58} +{"lang":"de","description":"README schreiben — Projektübersicht, Schnellstart und Einstiegshilfe","vocabulary":"README Projektübersicht Schnellstart Einstieg Einführung Onboarding Erste-Schritte Überblick","embed_threshold":0.55} +{"lang":"es","description":"Redaccion de README — estructura, insignias, inicio rapido, contribuir","vocabulary":"README redactar estructura insignia badge inicio-rapido instalacion uso contribuir ejemplo","embed_threshold":0.57} +{"lang":"ja","description":"READMEの作成、プロジェクト概要、はじめかた","vocabulary":"README プロジェクト概要 はじめかた クイックスタート オンボーディング","embed_threshold":0.56} diff --git a/hooks/ways/softwaredev/docs/standards/standards.locales.jsonl b/hooks/ways/softwaredev/docs/standards/standards.locales.jsonl new file mode 100644 index 00000000..e5f09964 --- /dev/null +++ b/hooks/ways/softwaredev/docs/standards/standards.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"معايير الفريق واتفاقياته","vocabulary":"معايير اتفاقيات فريق قواعد نمط توحيد","embed_threshold":0.65} +{"lang":"de","description":"Teamstandards — Coding-Konventionen, Testphilosophie und Stilrichtlinien","vocabulary":"Konvention Norm Richtlinie Barrierefreiheit Stilrichtlinie Linting Codestandard Programmierrichtlinie Teamvereinbarung","embed_threshold":0.61} +{"lang":"es","description":"Normas y convenciones del equipo — estilo, nomenclatura, acuerdos","vocabulary":"normas convenciones estilo nomenclatura acuerdo equipo estandar guia-de-estilo consistencia","embed_threshold":0.59} +{"lang":"ja","description":"チームの規約、コーディング規約、テスト方針","vocabulary":"規約 ルール ガイドライン アクセシビリティ スタイルガイド リンティング","embed_threshold":0.63} diff --git a/hooks/ways/softwaredev/environment/config/config.locales.jsonl b/hooks/ways/softwaredev/environment/config/config.locales.jsonl new file mode 100644 index 00000000..cfdc381f --- /dev/null +++ b/hooks/ways/softwaredev/environment/config/config.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الإعدادات ومتغيرات البيئة","vocabulary":"إعدادات تكوين متغيرات بيئة ضبط تهيئة","embed_threshold":0.77} +{"lang":"de","description":"Konfiguration und Umgebungsvariablen — dotenv-Dateien und Verbindungseinstellungen","vocabulary":"dotenv Umgebungsvariable Konfiguration envvar Config Verbindung Port Einstellung Konfigurationsdatei","embed_threshold":0.53} +{"lang":"es","description":"Configuracion — variables de entorno, archivos de configuracion, dotenv","vocabulary":"configuracion variable-entorno dotenv env archivo config ajustes parametro perfil","embed_threshold":0.62} +{"lang":"ja","description":"設定管理、環境変数、dotenvファイル","vocabulary":"dotenv 環境変数 設定 コンフィグ 接続 ポート 構成","embed_threshold":0.54} diff --git a/hooks/ways/softwaredev/environment/debugging/debugging.locales.jsonl b/hooks/ways/softwaredev/environment/debugging/debugging.locales.jsonl new file mode 100644 index 00000000..f0f0dcc8 --- /dev/null +++ b/hooks/ways/softwaredev/environment/debugging/debugging.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"تصحيح الأخطاء واستكشاف المشكلات","vocabulary":"تصحيح أخطاء استكشاف مشكلات تتبع حل","embed_threshold":0.67} +{"lang":"de","description":"Debugging und Fehlersuche — Breakpoints, Stacktraces und Regressionsanalyse","vocabulary":"Debug Breakpoint Stacktrace untersuchen Fehlersuche Regression Absturz Bug Fehler Haltepunkt Ablaufverfolgung","embed_threshold":0.6} +{"lang":"es","description":"Depuracion y resolucion de problemas — trazas, puntos de interrupcion, diagnostico","vocabulary":"depurar depuracion traza breakpoint diagnostico error problema resolver log inspeccionar","embed_threshold":0.63} +{"lang":"ja","description":"デバッグ、障害の調査と問題解決","vocabulary":"デバッグ ブレークポイント スタックトレース 調査 トラブルシューティング リグレッション クラッシュ バグ","embed_threshold":0.68} diff --git a/hooks/ways/softwaredev/environment/deps/deps.locales.jsonl b/hooks/ways/softwaredev/environment/deps/deps.locales.jsonl new file mode 100644 index 00000000..e4a988ef --- /dev/null +++ b/hooks/ways/softwaredev/environment/deps/deps.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إدارة التبعيات والحزم","vocabulary":"تبعيات حزم إدارة تثبيت تحديث إصدارات","embed_threshold":0.76} +{"lang":"de","description":"Abhängigkeitsverwaltung — Pakete installieren, aktualisieren und auditieren","vocabulary":"Abhängigkeit Paket Bibliothek installieren aktualisieren Audit Schwachstelle Paketmanager Paketverwaltung","embed_threshold":0.61} +{"lang":"es","description":"Gestion de dependencias — instalar, actualizar, resolver conflictos","vocabulary":"dependencias gestionar instalar actualizar resolver conflicto version paquete gestor lock","embed_threshold":0.59} +{"lang":"ja","description":"依存関係管理、パッケージのインストール","vocabulary":"依存関係 パッケージ ライブラリ インストール アップグレード 監査 脆弱性","embed_threshold":0.67} diff --git a/hooks/ways/softwaredev/environment/environment.locales.jsonl b/hooks/ways/softwaredev/environment/environment.locales.jsonl new file mode 100644 index 00000000..921a1c25 --- /dev/null +++ b/hooks/ways/softwaredev/environment/environment.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إعداد بيئة التطوير","vocabulary":"بيئة تطوير إعداد تثبيت أدوات تهيئة","embed_threshold":0.77} +{"lang":"de","description":"Entwicklungsumgebung einrichten — Konfiguration, Abhängigkeiten, Debugging und SSH","vocabulary":"Umgebung Einrichtung Konfiguration Abhängigkeit installieren Build Werkzeug Debug SSH Entwicklungsumgebung","embed_threshold":0.61} +{"lang":"es","description":"Entorno de desarrollo — configuracion, herramientas, preparacion del espacio de trabajo","vocabulary":"entorno desarrollo configurar herramientas espacio-de-trabajo preparar instalar requisitos local","embed_threshold":0.59} +{"lang":"ja","description":"開発環境 — 設定、依存関係、デバッグ、SSH","vocabulary":"環境 セットアップ 設定 依存関係 インストール ビルド ツール デバッグ SSH","embed_threshold":0.67} diff --git a/hooks/ways/softwaredev/environment/makefile/makefile.locales.jsonl b/hooks/ways/softwaredev/environment/makefile/makefile.locales.jsonl new file mode 100644 index 00000000..ee46c2cc --- /dev/null +++ b/hooks/ways/softwaredev/environment/makefile/makefile.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"Makefile كمشغل للمهام","vocabulary":"Makefile مهام بناء تشغيل أوامر أتمتة make","embed_threshold":0.61} +{"lang":"de","description":"Makefile als Projekt-Taskrunner — Build, Lint, Test und Format als Make-Targets","vocabulary":"Makefile Make Target Build Lint Test Format Clean Install Taskrunner Bauziel","embed_threshold":0.55} +{"lang":"es","description":"Makefile como ejecutor de tareas — targets, recetas, automatizacion de build","vocabulary":"Makefile make target receta tarea build automatizar compilar ejecutar dependencia","embed_threshold":0.62} +{"lang":"ja","description":"Makefileをプロジェクトのタスクランナーとして活用","vocabulary":"Makefile make ターゲット ビルド リント テスト フォーマット クリーン インストール","embed_threshold":0.62} diff --git a/hooks/ways/softwaredev/environment/ssh/ssh.locales.jsonl b/hooks/ways/softwaredev/environment/ssh/ssh.locales.jsonl new file mode 100644 index 00000000..a866a8e9 --- /dev/null +++ b/hooks/ways/softwaredev/environment/ssh/ssh.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"الوصول عن بعد عبر SSH","vocabulary":"SSH وصول بعيد اتصال آمن مفاتيح نفق","embed_threshold":0.51} +{"lang":"de","description":"SSH-Fernzugriff — Schlüsselverwaltung, sichere Übertragung und Tunneling","vocabulary":"SSH Fernzugriff Schlüssel SCP Rsync Bastion Jumphost Tunnel Portweiterleitung Schlüsselverwaltung","embed_threshold":0.51} +{"lang":"es","description":"Acceso remoto SSH — claves, tuneles, configuracion, agente","vocabulary":"SSH remoto clave publica privada tunel configuracion agente acceso servidor conexion","embed_threshold":0.58} +{"lang":"ja","description":"SSHリモートアクセス、鍵管理、セキュア転送","vocabulary":"SSH リモート 鍵 SCP rsync 踏み台 トンネル ポートフォワーディング","embed_threshold":0.55} diff --git a/hooks/ways/softwaredev/visualization/charts/charts.locales.jsonl b/hooks/ways/softwaredev/visualization/charts/charts.locales.jsonl new file mode 100644 index 00000000..41a67d99 --- /dev/null +++ b/hooks/ways/softwaredev/visualization/charts/charts.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"رسومات بيانية في الطرفية","vocabulary":"رسم بياني طرفية مخطط بيانات عرض","embed_threshold":0.7} +{"lang":"de","description":"ANSI-Terminaldiagramme rendern — Balken-, Linien- und Sparkline-Grafiken","vocabulary":"Diagramm Grafik Sparkline Histogramm Plot Trend Metrik Balkendiagramm Liniendiagramm Visualisierung","embed_threshold":0.59} +{"lang":"es","description":"Graficos en terminal — barras, lineas, histogramas, visualizacion de datos","vocabulary":"grafico barra linea histograma terminal datos visualizar sparkline tabla ASCII","embed_threshold":0.75} +{"lang":"ja","description":"ANSIターミナルチャートの描画 — 棒グラフ、折れ線、スパークライン","vocabulary":"チャート グラフ スパークライン ヒストグラム プロット 傾向 指標 可視化","embed_threshold":0.58} diff --git a/hooks/ways/softwaredev/visualization/diagrams/diagrams.locales.jsonl b/hooks/ways/softwaredev/visualization/diagrams/diagrams.locales.jsonl new file mode 100644 index 00000000..bff25c1b --- /dev/null +++ b/hooks/ways/softwaredev/visualization/diagrams/diagrams.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"رسومات Mermaid في الطرفية","vocabulary":"رسم تخطيطي Mermaid طرفية مخطط تدفق","embed_threshold":0.9} +{"lang":"de","description":"Mermaid-Diagramme als Terminal-Grafik rendern — Fluss-, Sequenz- und ER-Diagramme","vocabulary":"Mermaid Diagramm Flussdiagramm Sequenzdiagramm Zustandsautomat ER Entitätsbeziehung Terminalkunst Schaubild","embed_threshold":0.9} +{"lang":"es","description":"Diagramas Mermaid en terminal — flujo, secuencia, arquitectura visual","vocabulary":"diagrama Mermaid terminal flujo secuencia arquitectura visual renderizar ASCII bloque","embed_threshold":0.81} +{"lang":"ja","description":"Mermaid図をターミナルアートとして描画","vocabulary":"Mermaid 図 フローチャート シーケンス図 状態遷移 ER図 エンティティ","embed_threshold":0.9} diff --git a/hooks/ways/softwaredev/visualization/visualization.locales.jsonl b/hooks/ways/softwaredev/visualization/visualization.locales.jsonl new file mode 100644 index 00000000..a3a2b473 --- /dev/null +++ b/hooks/ways/softwaredev/visualization/visualization.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"التمثيلات البصرية والمرئية","vocabulary":"تمثيل بصري مرئي رسم عرض تصور بيانات","embed_threshold":0.69} +{"lang":"de","description":"Visuelle Darstellungen vorschlagen — Systeme durch Diagramme und Abläufe erklären","vocabulary":"durchgehen erklären zeigen beschreiben Überblick Ablauf Prozess Architektur Darstellung Veranschaulichung Systemübersicht","embed_threshold":0.71} +{"lang":"es","description":"Representaciones visuales — diagramas, graficos, tablas, arte ASCII","vocabulary":"visualizacion representacion diagrama grafico tabla ASCII arte terminal formato presentar","embed_threshold":0.75} +{"lang":"ja","description":"システム説明時に適切な視覚表現を提案","vocabulary":"説明 概要 フロー プロセス アーキテクチャ 可視化 図解","embed_threshold":0.73} diff --git a/hooks/ways/writing/writing.locales.jsonl b/hooks/ways/writing/writing.locales.jsonl new file mode 100644 index 00000000..d51c70de --- /dev/null +++ b/hooks/ways/writing/writing.locales.jsonl @@ -0,0 +1,4 @@ +{"lang":"ar","description":"إنشاء المحتوى والمستندات والمقترحات","vocabulary":"كتابة محتوى مستند مقترح تأليف نص وثيقة","embed_threshold":0.7} +{"lang":"de","description":"Texterstellung — Dokumente, Präsentationen, Berichte und Entwürfe verfassen","vocabulary":"schreiben verfassen entwerfen Vorschlag Bericht Präsentation Memo Gliederung überarbeiten Entwurf Dokument Texterstellung","embed_threshold":0.73} +{"lang":"es","description":"Creacion de contenido — documentos, propuestas, redaccion, comunicacion escrita","vocabulary":"escribir redactar documento propuesta contenido comunicacion texto borrador revisar estilo","embed_threshold":0.71} +{"lang":"ja","description":"コンテンツ作成 — 文書、プレゼン、レポート、提案書","vocabulary":"執筆 下書き 作成 提案書 レポート プレゼン メモ アウトライン 推敲","embed_threshold":0.76} diff --git a/settings.json b/settings.json index 1458aca0..86816b5f 100644 --- a/settings.json +++ b/settings.json @@ -8,8 +8,10 @@ "permissions": { "allow": [ "Edit(~/.claude/**)", + "Edit(/home/aaron/.claude/**)", "Read", "Write(~/.claude/**)", + "Write(/home/aaron/.claude/**)", "Bash(~/.claude/hooks/ways/list-triggered.sh:*)", "Bash(cat:*)", "Bash(head:*)", diff --git a/tools/way-embed/test-embedding.sh b/tools/way-embed/test-embedding.sh index 56f0a4bb..d13885de 100755 --- a/tools/way-embed/test-embedding.sh +++ b/tools/way-embed/test-embedding.sh @@ -12,7 +12,9 @@ elif [[ -x "${HOME}/.claude/bin/way-embed" ]]; then else WAY_EMBED="" fi -CORPUS="${XDG_WAY}/ways-corpus.jsonl" +# Use EN-only corpus — this test validates the English model against English ways. +# Multilingual matching is tested separately by test-multilingual.sh. +CORPUS="${XDG_WAY}/ways-corpus-en.jsonl" MODEL="${XDG_WAY}/minilm-l6-v2.gguf" pass=0; fail=0; skip=0 diff --git a/tools/way-embed/test-multilingual.sh b/tools/way-embed/test-multilingual.sh index bf58eb0a..ac690eab 100755 --- a/tools/way-embed/test-multilingual.sh +++ b/tools/way-embed/test-multilingual.sh @@ -193,7 +193,7 @@ if $MARKDOWN; then echo "" echo "1. **English ways + English model** — current production. High precision for English prompts." echo "2. **English ways + multilingual model (cross-language)** — user types in any language, matches against English descriptions. Works but scores 30-50% lower." - echo "3. **Native-language stubs + multilingual model (same-language)** — frontmatter-only \`.ja.md\` stubs with native descriptions. Consistently scores 0.80+ across tested languages." + echo "3. **Native-language stubs + multilingual model (same-language)** — locale entries in \`.locales.jsonl\` with native descriptions. Consistently scores 0.80+ across tested languages." echo "" echo "**Recommendation:** Ship both models. English ways use the English model (precise, 21MB). Multilingual stubs use the multilingual model (broad, 127MB). Per-way \`embed_model\` frontmatter field controls routing. This gives per-language threshold tuning without compromising English accuracy." else diff --git a/tools/ways-cli/scripts/pack-locales.sh b/tools/ways-cli/scripts/pack-locales.sh new file mode 100755 index 00000000..a1136a9e --- /dev/null +++ b/tools/ways-cli/scripts/pack-locales.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# Pack individual .lang.md locale stubs into .locales.jsonl files. +# Usage: pack-locales.sh [--dry-run] [ways-dir] +set -euo pipefail + +DRY_RUN=false +WAYS_DIR="${HOME}/.claude/hooks/ways" + +for arg in "$@"; do + case "$arg" in + --dry-run) DRY_RUN=true ;; + *) WAYS_DIR="$arg" ;; + esac +done + +packed=0 +deleted=0 + +# Find all locale stub files (*.lang.md where lang is 2-5 lowercase chars) +find -L "$WAYS_DIR" -name '*.*.md' -not -name '*.check.md' -type f | sort | while IFS= read -r stubfile; do + fname=$(basename "$stubfile") + stem="${fname%.md}" + + # Extract candidate locale code (last dot-separated segment) + lang="${stem##*.}" + wayname="${stem%.*}" + + # Validate: 2-5 lowercase ascii chars + if ! echo "$lang" | grep -qE '^[a-z]{2,5}$'; then + continue + fi + + dir=$(dirname "$stubfile") + jsonl_file="${dir}/${wayname}.locales.jsonl" + + # Parse YAML frontmatter + desc="" + vocab="" + in_fm=false + while IFS= read -r line; do + if [ "$line" = "---" ]; then + if $in_fm; then break; fi + in_fm=true + continue + fi + if $in_fm; then + case "$line" in + description:*) desc="${line#description: }" ;; + vocabulary:*) vocab="${line#vocabulary: }" ;; + esac + fi + done < "$stubfile" + + if [ -z "$desc" ]; then + echo "SKIP (no description): $stubfile" >&2 + continue + fi + + # Build JSON line (escape for JSON) + json_line=$(printf '{"lang":"%s","description":"%s","vocabulary":"%s"}' \ + "$lang" \ + "$(echo "$desc" | sed 's/"/\\"/g')" \ + "$(echo "$vocab" | sed 's/"/\\"/g')") + + if $DRY_RUN; then + echo "PACK: $stubfile -> $jsonl_file" + echo " $json_line" + else + echo "$json_line" >> "$jsonl_file" + rm "$stubfile" + deleted=$((deleted + 1)) + fi +done + +if ! $DRY_RUN; then + # Sort each .locales.jsonl by lang for determinism + find -L "$WAYS_DIR" -name '*.locales.jsonl' -type f | while IFS= read -r jf; do + sort -o "$jf" "$jf" + packed=$((packed + 1)) + done + echo "Packed locale stubs into $(find -L "$WAYS_DIR" -name '*.locales.jsonl' | wc -l) .locales.jsonl files" + echo "Deleted $(find -L "$WAYS_DIR" -name '*.*.md' -not -name '*.check.md' -not -name '*.locales.jsonl' | wc -l) remaining stub files" +fi diff --git a/tools/ways-cli/src/cmd/corpus.rs b/tools/ways-cli/src/cmd/corpus.rs index c0d44b06..810a919d 100644 --- a/tools/ways-cli/src/cmd/corpus.rs +++ b/tools/ways-cli/src/cmd/corpus.rs @@ -152,31 +152,55 @@ pub fn run(ways_dir: Option, quiet: bool, if_stale: bool) -> Result<()> fn scan_ways_dir(dir: &Path, id_prefix: &str, excluded: &[String], w: &mut impl Write) -> Result { let mut count = 0; - let mut files: Vec = Vec::new(); + let mut md_files: Vec = Vec::new(); + let mut locale_files: Vec = Vec::new(); + // Track which (directory, lang) pairs have external .lang.md overrides + let mut locale_overrides: std::collections::HashSet<(PathBuf, String)> = std::collections::HashSet::new(); + for entry in WalkDir::new(dir) .follow_links(true) .into_iter() .filter_map(|e| e.ok()) { let path = entry.path(); - if !path.is_file() || path.extension().and_then(|e| e.to_str()) != Some("md") { + if !path.is_file() { + continue; + } + + let fname = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + + // Collect .locales.jsonl files + if fname.ends_with(".locales.jsonl") { + if !crate::util::is_excluded_path(path, excluded) { + locale_files.push(path.to_path_buf()); + } + continue; + } + + if path.extension().and_then(|e| e.to_str()) != Some("md") { continue; } - if path - .file_name() - .and_then(|n| n.to_str()) - .map_or(false, |n| n.contains(".check.")) - { + if fname.contains(".check.") { continue; } if crate::util::is_excluded_path(path, excluded) { continue; } - files.push(path.to_path_buf()); + + // Detect locale override files ({name}.{lang}.md) + if let Some(lang) = crate::util::extract_locale_from_filename(fname) { + if let Some(parent) = path.parent() { + locale_overrides.insert((parent.to_path_buf(), lang)); + } + } + + md_files.push(path.to_path_buf()); } - files.sort(); + md_files.sort(); + locale_files.sort(); - for path in &files { + // Pass 1: process .md files (including any external locale override .lang.md files) + for path in &md_files { let fm = match frontmatter::parse(path) { Ok(fm) => fm, Err(_) => continue, @@ -209,9 +233,70 @@ fn scan_ways_dir(dir: &Path, id_prefix: &str, excluded: &[String], w: &mut impl count += 1; } + // Pass 2: process .locales.jsonl files + for path in &locale_files { + let parent = path.parent().unwrap_or(Path::new("")); + let relparent = parent.strip_prefix(dir).unwrap_or(parent); + let id = format!("{}{}", id_prefix, relparent.display()); + + // Read the parent way's threshold for inheritance + let parent_threshold = find_parent_threshold(parent); + + let entries = match frontmatter::parse_locales_jsonl(path) { + Ok(e) => e, + Err(_) => continue, + }; + + for le in entries { + // Skip if an external .lang.md override exists + if locale_overrides.contains(&(parent.to_path_buf(), le.lang.clone())) { + continue; + } + + let entry = json!({ + "id": id, + "description": le.description, + "vocabulary": le.vocabulary.unwrap_or_default(), + "threshold": parent_threshold, + "embed_threshold": le.embed_threshold.unwrap_or(0.25), + "embed_model": "multilingual", + }); + + serde_json::to_writer(&mut *w, &entry)?; + w.write_all(b"\n")?; + count += 1; + } + } + Ok(count) } +/// Find the BM25 threshold from the parent way's frontmatter. +fn find_parent_threshold(dir: &Path) -> f64 { + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.filter_map(|e| e.ok()) { + let path = entry.path(); + let fname = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + if path.extension().and_then(|e| e.to_str()) != Some("md") { + continue; + } + // Skip locale stubs and check files + if fname.contains('.') && fname.ends_with(".md") { + let stem = fname.strip_suffix(".md").unwrap_or(""); + if stem.contains('.') { + continue; + } + } + if let Ok(fm) = frontmatter::parse(&path) { + if !fm.description.is_empty() { + return fm.threshold.unwrap_or(2.0); + } + } + } + } + 2.0 +} + /// Shell out to way-embed generate for embedding vectors. /// Generates two corpus files: one with EN model embeddings, one with multilingual. fn auto_embed(xdg_way: &Path, corpus: &Path, log: &dyn Fn(&str)) -> Result<()> { @@ -452,8 +537,9 @@ fn is_stale(manifest: &Path, global_dir: &Path, project_dir: &str) -> bool { .filter_map(|e| e.ok()) { let path = entry.path(); - if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") { - if is_newer_than(path, manifest) { + if path.is_file() { + let ext = path.extension().and_then(|e| e.to_str()); + if (ext == Some("md") || ext == Some("jsonl")) && is_newer_than(path, manifest) { return true; } } @@ -469,8 +555,9 @@ fn is_stale(manifest: &Path, global_dir: &Path, project_dir: &str) -> bool { .filter_map(|e| e.ok()) { let path = entry.path(); - if path.is_file() && path.extension().and_then(|e| e.to_str()) == Some("md") { - if is_newer_than(path, manifest) { + if path.is_file() { + let ext = path.extension().and_then(|e| e.to_str()); + if (ext == Some("md") || ext == Some("jsonl")) && is_newer_than(path, manifest) { return true; } } diff --git a/tools/ways-cli/src/cmd/language.rs b/tools/ways-cli/src/cmd/language.rs index ac4a1ae5..2f42ae4f 100644 --- a/tools/ways-cli/src/cmd/language.rs +++ b/tools/ways-cli/src/cmd/language.rs @@ -1,7 +1,7 @@ //! Language coverage report — shows multilingual state of ways. //! //! Reports: resolved output language, per-way embed_model, -//! language stub files (.ja.md, .ko.md, etc.), and model availability. +//! locale stubs (.locales.jsonl + override .lang.md files), and model availability. use anyhow::Result; use serde_json::json; @@ -14,7 +14,7 @@ use crate::frontmatter; use crate::table::Table; use crate::util::home_dir; -pub fn run(filter_lang: Option<&str>, json_output: bool) -> Result<()> { +pub fn run(filter_lang: Option<&str>, audit: bool, json_output: bool) -> Result<()> { let ways_dir = home_dir().join(".claude/hooks/ways"); let xdg_way = xdg_cache_dir().join("claude-ways/user"); let excluded = crate::util::load_excluded_segments(); @@ -71,10 +71,32 @@ pub fn run(filter_lang: Option<&str>, json_output: bool) -> Result<()> { println!("Multi corpus: {multi_corpus_count} ways"); println!(); + // Language coverage summary + let all_supported = get_all_language_codes(); + let non_en: Vec<&str> = all_supported.iter() + .filter(|c| *c != "en") + .map(|s| s.as_str()) + .collect(); + if !all_locales.is_empty() { - println!("Language stubs found: {}", all_locales.iter().cloned().collect::>().join(", ")); - println!(); + println!("Covered ({}/{}): {} + en", + all_locales.len(), non_en.len(), + all_locales.iter().cloned().collect::>().join(", "), + ); + } + + let uncovered: Vec = non_en.iter() + .filter(|code| !all_locales.contains(**code)) + .map(|s| s.to_string()) + .collect(); + + if !uncovered.is_empty() { + println!("Uncovered ({}/{}): {}", + uncovered.len(), non_en.len(), + uncovered.join(", "), + ); } + println!(); // Summary counts let en_count = ways.iter().filter(|w| w.embed_model == "en").count(); @@ -82,8 +104,8 @@ pub fn run(filter_lang: Option<&str>, json_output: bool) -> Result<()> { println!("Ways: {} total ({} en, {} multilingual)", ways.len(), en_count, multi_count); println!(); - // Per-way detail - if !ways.is_empty() { + // Per-way detail (only in audit mode) + if audit && !ways.is_empty() { let mut t = Table::new(&["Way", "Model", "Locales"]); t.max_width(0, 45); for w in &ways { @@ -97,6 +119,8 @@ pub fn run(filter_lang: Option<&str>, json_output: bool) -> Result<()> { t.add(vec![&w.id, &w.embed_model, &locales]); } t.print(); + } else if !audit { + println!("Run `ways language --audit` for per-way detail."); } // Warnings @@ -161,20 +185,32 @@ fn scan_way_dirs( let mut embed_model = "en".to_string(); let mut locales = BTreeSet::new(); - // Read all .md files in this directory + // Read all files in this directory if let Ok(entries) = std::fs::read_dir(dir_path) { for entry in entries.filter_map(|e| e.ok()) { let path = entry.path(); - if path.extension().and_then(|e| e.to_str()) != Some("md") { - continue; - } let fname = match path.file_name().and_then(|n| n.to_str()) { Some(n) => n.to_string(), None => continue, }; - // Check for locale stubs: {name}.{lang}.md - if let Some(locale) = extract_locale(&fname) { + // Check .locales.jsonl for packed locale stubs + if fname.ends_with(".locales.jsonl") { + if let Ok(entries) = frontmatter::parse_locales_jsonl(&path) { + for le in entries { + locales.insert(le.lang.clone()); + all_locales.insert(le.lang); + } + } + continue; + } + + if path.extension().and_then(|e| e.to_str()) != Some("md") { + continue; + } + + // Check for locale override files: {name}.{lang}.md + if let Some(locale) = crate::util::extract_locale_from_filename(&fname) { locales.insert(locale.clone()); all_locales.insert(locale); continue; @@ -199,33 +235,6 @@ fn scan_way_dirs( Ok(()) } -/// Extract locale code from filename like "security.ja.md" → "ja" -/// Validates against languages.json to avoid false matches like ".check.md" -fn extract_locale(filename: &str) -> Option { - // Skip check files explicitly - if filename.contains(".check.") { - return None; - } - - let parts: Vec<&str> = filename.strip_suffix(".md")?.split('.').collect(); - if parts.len() >= 2 { - let candidate = parts[parts.len() - 1]; - // Validate it looks like a locale code (2-5 chars, lowercase/hyphen) - if candidate.len() >= 2 - && candidate.len() <= 5 - && candidate.chars().all(|c| c.is_ascii_lowercase() || c == '-') - { - // Verify against languages.json - let parsed: serde_json::Value = - serde_json::from_str(agents::LANGUAGES_JSON).ok()?; - if parsed.get("languages")?.as_object()?.contains_key(candidate) { - return Some(candidate.to_string()); - } - } - } - None -} - /// Best-effort reverse lookup: language name → code fn resolve_to_code(lang: &str) -> String { let lower = lang.to_lowercase(); @@ -254,6 +263,21 @@ fn line_count(path: &Path) -> usize { .unwrap_or(0) } +/// Get all language codes from languages.json, sorted. +fn get_all_language_codes() -> Vec { + let parsed: serde_json::Value = match serde_json::from_str(agents::LANGUAGES_JSON) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + let mut codes: Vec = parsed + .get("languages") + .and_then(|v| v.as_object()) + .map(|m| m.keys().cloned().collect()) + .unwrap_or_default(); + codes.sort(); + codes +} + fn xdg_cache_dir() -> PathBuf { std::env::var("XDG_CACHE_HOME") .map(PathBuf::from) diff --git a/tools/ways-cli/src/cmd/mod.rs b/tools/ways-cli/src/cmd/mod.rs index 688f121d..a46ced7c 100644 --- a/tools/ways-cli/src/cmd/mod.rs +++ b/tools/ways-cli/src/cmd/mod.rs @@ -20,3 +20,4 @@ pub mod stats; pub mod status; pub mod suggest; pub mod tree; +pub mod tune; diff --git a/tools/ways-cli/src/cmd/tune.rs b/tools/ways-cli/src/cmd/tune.rs new file mode 100644 index 00000000..90460ad1 --- /dev/null +++ b/tools/ways-cli/src/cmd/tune.rs @@ -0,0 +1,554 @@ +//! Tune embed_threshold values for locale stubs. +//! +//! Two modes: +//! - **Tune** (default): compute optimal thresholds per locale entry +//! - **Audit** (`--audit`): surface entries with low discrimination — +//! where the description doesn't clearly separate this way from others +//! +//! Parallelized: uses all cores minus 4, one way per thread. + +use anyhow::{bail, Context, Result}; +use std::collections::BTreeMap; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::{Arc, Mutex}; +use walkdir::WalkDir; + +use crate::frontmatter; +use crate::util::home_dir; + +/// A confuser: a non-self way that scores close to self. +#[derive(Clone)] +struct Confuser { + way_id: String, + score: f64, +} + +#[derive(Clone)] +struct TuneResult { + way_id: String, + lang: String, + current: f64, + optimal: f64, + best_self: f64, + best_non_self: Option, + /// Gap between self and best non-self (discrimination signal) + gap: f64, + changed: bool, + /// Top 3 closest non-self ways + confusers: Vec, +} + +struct WayTuneResult { + way_id: String, + locale_path: PathBuf, + results: Vec, + tuned_entries: Vec, + original_entries: Vec, +} + +pub fn run( + ways_dir: Option, + way_filter: Option, + apply: bool, + audit: bool, + audit_threshold: f64, + margin: f64, + json_output: bool, +) -> Result<()> { + let global_dir = ways_dir + .map(PathBuf::from) + .unwrap_or_else(|| home_dir().join(".claude/hooks/ways")); + let xdg_way = xdg_cache_dir().join("claude-ways/user"); + + let multi_corpus = xdg_way.join("ways-corpus-multi.jsonl"); + let multi_model = xdg_way.join("multilingual-minilm-l12-v2-q8.gguf"); + + if !multi_corpus.is_file() { + bail!("Multilingual corpus not found. Run `ways corpus` first."); + } + if !multi_model.is_file() { + bail!("Multilingual model not found. Run `make setup` first."); + } + + let embed_bin = find_way_embed() + .context("way-embed binary not found. Run `make setup` to install.")?; + + let excluded = crate::util::load_excluded_segments(); + + // Collect all .locales.jsonl files + let mut locale_files: Vec<(String, PathBuf)> = Vec::new(); + for entry in WalkDir::new(&global_dir) + .follow_links(true) + .into_iter() + .filter_map(|e| e.ok()) + { + let path = entry.path(); + if !path.is_file() { + continue; + } + let fname = path.file_name().and_then(|n| n.to_str()).unwrap_or(""); + if !fname.ends_with(".locales.jsonl") { + continue; + } + if crate::util::is_excluded_path(path, &excluded) { + continue; + } + + let parent = path.parent().unwrap_or(Path::new("")); + let rel = parent.strip_prefix(&global_dir).unwrap_or(parent); + let way_id = rel.display().to_string(); + + if let Some(ref filter) = way_filter { + if !way_id.contains(filter.as_str()) { + continue; + } + } + + locale_files.push((way_id, path.to_path_buf())); + } + locale_files.sort_by(|a, b| a.0.cmp(&b.0)); + + if locale_files.is_empty() { + if way_filter.is_some() { + bail!("No .locales.jsonl files matched filter"); + } + bail!("No .locales.jsonl files found"); + } + + // Parallelism: all cores minus 4 (leave headroom), minimum 1 + let n_cores = std::thread::available_parallelism() + .map(|n| n.get()) + .unwrap_or(4); + let n_workers = n_cores.saturating_sub(4).max(1); + + let total_ways = locale_files.len(); + eprintln!( + "{} {} ways across {} threads...", + if audit { "Auditing" } else { "Tuning" }, + total_ways, + n_workers + ); + + // Shared state + let work_queue: Arc>> = Arc::new(Mutex::new(locale_files)); + let completed: Arc> = Arc::new(Mutex::new(0)); + let failed: Arc> = Arc::new(Mutex::new(0)); + let all_results: Arc>> = Arc::new(Mutex::new(Vec::new())); + + // Spawn workers + let mut handles = Vec::new(); + for _ in 0..n_workers { + let queue = Arc::clone(&work_queue); + let results = Arc::clone(&all_results); + let completed = Arc::clone(&completed); + let failed = Arc::clone(&failed); + let embed_bin = embed_bin.clone(); + let multi_corpus = multi_corpus.clone(); + let multi_model = multi_model.clone(); + + let handle = std::thread::spawn(move || { + loop { + let item = { + let mut q = queue.lock().unwrap(); + q.pop() + }; + + let (way_id, locale_path) = match item { + Some(i) => i, + None => break, + }; + + match tune_way(&way_id, &locale_path, &embed_bin, &multi_corpus, &multi_model, margin) { + Ok(result) => { + let mut r = results.lock().unwrap(); + r.push(result); + } + Err(e) => { + eprintln!("\nERROR tuning {}: {}", way_id, e); + let mut f = failed.lock().unwrap(); + *f += 1; + } + } + + let done = { + let mut c = completed.lock().unwrap(); + *c += 1; + *c + }; + eprint!("\r {}/{} ways", done, total_ways); + } + }); + handles.push(handle); + } + + for h in handles { + h.join().unwrap(); + } + eprintln!(); + + let fail_count = *failed.lock().unwrap(); + if fail_count > 0 { + eprintln!("WARNING: {} ways failed to tune", fail_count); + } + + // Collect and sort + let mut way_results = Arc::try_unwrap(all_results) + .map_err(|_| anyhow::anyhow!("failed to unwrap results")) + .unwrap() + .into_inner() + .unwrap(); + way_results.sort_by(|a, b| a.way_id.cmp(&b.way_id)); + + let mut all_tune_results: Vec = Vec::new(); + let mut files_to_update: BTreeMap> = BTreeMap::new(); + + for wr in &way_results { + all_tune_results.extend(wr.results.clone()); + + let any_changed = wr.tuned_entries.iter().any(|e| { + let orig = wr.original_entries.iter().find(|o| o.lang == e.lang); + orig.map_or(false, |o| o.embed_threshold != e.embed_threshold) + }); + if any_changed { + files_to_update.insert(wr.locale_path.clone(), wr.tuned_entries.clone()); + } + } + + // Output + if audit { + output_audit(&all_tune_results, audit_threshold, json_output)?; + } else { + output_tune(&all_tune_results, apply, json_output)?; + } + + // Apply if requested (tune mode only) + if !audit && apply { + let changed_count = all_tune_results.iter().filter(|r| r.changed).count(); + if changed_count > 0 { + for (path, entries) in &files_to_update { + let mut lines: Vec = Vec::new(); + for entry in entries { + lines.push(serde_json::to_string(entry)?); + } + lines.sort(); + let content = lines.join("\n") + "\n"; + std::fs::write(path, content) + .with_context(|| format!("writing {}", path.display()))?; + } + eprintln!("Updated {} .locales.jsonl files", files_to_update.len()); + eprintln!("Run `ways corpus` to regenerate the corpus with tuned thresholds."); + } + } + + Ok(()) +} + +/// Standard tune output: threshold table. +fn output_tune(results: &[TuneResult], apply: bool, json_output: bool) -> Result<()> { + let changed_count = results.iter().filter(|r| r.changed).count(); + + if json_output { + let json_results: Vec = results + .iter() + .map(|r| { + serde_json::json!({ + "way": r.way_id, + "lang": r.lang, + "current": r.current, + "optimal": r.optimal, + "self_score": r.best_self, + "best_non_self": r.best_non_self, + "gap": r.gap, + "changed": r.changed, + "confusers": r.confusers.iter().map(|c| { + serde_json::json!({"way": c.way_id, "score": c.score}) + }).collect::>(), + }) + }) + .collect(); + println!("{}", serde_json::to_string_pretty(&json_results)?); + } else { + let mut table = + crate::table::Table::new(&["Way", "Lang", "Current", "Optimal", "Self", "Noise", "Gap", "Δ"]); + table.max_width(0, 40); + + for r in results { + let delta = if r.changed { + format!("{:+.2}", r.optimal - r.current) + } else { + "—".to_string() + }; + let noise = r + .best_non_self + .map_or("—".to_string(), |s| format!("{:.4}", s)); + + table.add(vec![ + &r.way_id, + &r.lang, + &format!("{:.2}", r.current), + &format!("{:.2}", r.optimal), + &format!("{:.4}", r.best_self), + &noise, + &format!("{:.2}", r.gap), + &delta, + ]); + } + table.print(); + + println!(); + println!("{} entries analyzed, {} would change", results.len(), changed_count); + + if changed_count > 0 && !apply { + println!(); + println!("Run with --apply to write tuned thresholds to .locales.jsonl files."); + println!("Then run `ways corpus` to regenerate the corpus."); + } + } + + Ok(()) +} + +/// Audit output: surface low-discrimination entries with confusers. +fn output_audit(results: &[TuneResult], min_gap: f64, json_output: bool) -> Result<()> { + let mut flagged: Vec<&TuneResult> = results + .iter() + .filter(|r| r.gap < min_gap) + .collect(); + flagged.sort_by(|a, b| a.gap.partial_cmp(&b.gap).unwrap_or(std::cmp::Ordering::Equal)); + + if json_output { + let json_results: Vec = flagged + .iter() + .map(|r| { + serde_json::json!({ + "way": r.way_id, + "lang": r.lang, + "gap": r.gap, + "self_score": r.best_self, + "best_non_self": r.best_non_self, + "confusers": r.confusers.iter().map(|c| { + serde_json::json!({"way": c.way_id, "score": c.score}) + }).collect::>(), + }) + }) + .collect(); + println!("{}", serde_json::to_string_pretty(&json_results)?); + return Ok(()); + } + + if flagged.is_empty() { + println!("No entries with discrimination gap < {:.2}", min_gap); + println!("All locale descriptions have clear separation from neighbors."); + return Ok(()); + } + + println!("Discrimination Audit"); + println!("===================="); + println!(); + println!( + "{} of {} entries have gap < {:.2} (ambiguous — description doesn't clearly", + flagged.len(), + results.len(), + min_gap, + ); + println!("separate this way from others. Consider revising description/vocabulary.)"); + println!(); + + // Group by way for cleaner output + let mut current_way = ""; + for r in &flagged { + if r.way_id != current_way { + if !current_way.is_empty() { + println!(); + } + current_way = &r.way_id; + println!(" {} ", current_way); + } + + let confuser_str = r + .confusers + .iter() + .map(|c| format!("{} ({:.2})", c.way_id, c.score)) + .collect::>() + .join(", "); + + println!( + " {} — gap {:.2} (self {:.2}, noise {:.2}) confused with: {}", + r.lang, + r.gap, + r.best_self, + r.best_non_self.unwrap_or(0.0), + confuser_str, + ); + } + + println!(); + println!("To fix: revise the description or vocabulary in the .locales.jsonl to"); + println!("better distinguish from confusers. Then re-run `ways tune` to update thresholds."); + + // Summary stats + let total = results.len(); + let clear = total - flagged.len(); + println!(); + println!( + "Summary: {} clear, {} ambiguous, {} total", + clear, + flagged.len(), + total + ); + + Ok(()) +} + +/// Tune all locale entries for a single way. +fn tune_way( + way_id: &str, + locale_path: &Path, + embed_bin: &Path, + multi_corpus: &Path, + multi_model: &Path, + margin: f64, +) -> Result { + let entries = frontmatter::parse_locales_jsonl(locale_path)?; + let mut results: Vec = Vec::new(); + let mut tuned_entries: Vec = Vec::new(); + + for entry in &entries { + let query = format!( + "{} {}", + entry.description, + entry.vocabulary.as_deref().unwrap_or("") + ); + + let output = Command::new(embed_bin) + .args([ + "match", + "--corpus", + multi_corpus.to_str().unwrap(), + "--model", + multi_model.to_str().unwrap(), + "--query", + &query, + "--threshold", + "-1", + ]) + .output() + .with_context(|| format!("way-embed match for {}/{}", way_id, entry.lang))?; + + if !output.status.success() { + tuned_entries.push(entry.clone()); + continue; + } + + let stdout = String::from_utf8_lossy(&output.stdout); + let mut self_scores: Vec = Vec::new(); + let mut non_self_entries: Vec<(String, f64)> = Vec::new(); + + for line in stdout.lines() { + let parts: Vec<&str> = line.split('\t').collect(); + if parts.len() < 2 { + continue; + } + let id = parts[0]; + let score: f64 = match parts[1].parse() { + Ok(s) => s, + Err(_) => continue, + }; + + if id == way_id { + self_scores.push(score); + } else { + non_self_entries.push((id.to_string(), score)); + } + } + + // Sort non-self by score descending, dedup by way_id (keep best) + non_self_entries.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)); + let mut seen = std::collections::HashSet::new(); + let mut top_confusers: Vec = Vec::new(); + for (id, score) in &non_self_entries { + if seen.insert(id.clone()) { + top_confusers.push(Confuser { + way_id: id.clone(), + score: *score, + }); + if top_confusers.len() >= 3 { + break; + } + } + } + + let best_self = self_scores.iter().cloned().fold(f64::NEG_INFINITY, f64::max); + let best_non_self = non_self_entries + .first() + .map(|(_, s)| *s) + .unwrap_or(f64::NEG_INFINITY); + + let gap = if best_non_self > f64::NEG_INFINITY { + best_self - best_non_self + } else { + best_self // no confusers at all = perfect discrimination + }; + + let optimal = if best_non_self > f64::NEG_INFINITY { + (best_non_self + margin).min(best_self - 0.01) + } else { + 0.15 + }; + + let optimal = optimal.max(0.10).min(0.90); + let optimal = (optimal * 100.0).round() / 100.0; + + let current = entry.embed_threshold.unwrap_or(0.25); + let changed = (optimal - current).abs() > 0.005; + + results.push(TuneResult { + way_id: way_id.to_string(), + lang: entry.lang.clone(), + current, + optimal, + best_self, + best_non_self: if best_non_self > f64::NEG_INFINITY { + Some(best_non_self) + } else { + None + }, + gap, + changed, + confusers: top_confusers, + }); + + let mut tuned = entry.clone(); + if changed { + tuned.embed_threshold = Some(optimal); + } + tuned_entries.push(tuned); + } + + Ok(WayTuneResult { + way_id: way_id.to_string(), + locale_path: locale_path.to_path_buf(), + results, + tuned_entries, + original_entries: entries, + }) +} + +fn find_way_embed() -> Option { + let xdg = xdg_cache_dir().join("claude-ways/user/way-embed"); + if xdg.is_file() { + return Some(xdg); + } + let bin = home_dir().join(".claude/bin/way-embed"); + if bin.is_file() { + return Some(bin); + } + None +} + +fn xdg_cache_dir() -> PathBuf { + std::env::var("XDG_CACHE_HOME") + .map(PathBuf::from) + .unwrap_or_else(|_| home_dir().join(".cache")) +} diff --git a/tools/ways-cli/src/frontmatter.rs b/tools/ways-cli/src/frontmatter.rs index b2c6b00e..e4fbe5a0 100644 --- a/tools/ways-cli/src/frontmatter.rs +++ b/tools/ways-cli/src/frontmatter.rs @@ -50,6 +50,33 @@ fn extract_frontmatter_str(content: &str) -> Option { None } +/// A single locale entry from a .locales.jsonl file. +#[derive(Debug, Clone, Deserialize, serde::Serialize)] +pub struct LocaleEntry { + pub lang: String, + pub description: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub vocabulary: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub embed_threshold: Option, +} + +/// Parse a .locales.jsonl file into locale entries. +pub fn parse_locales_jsonl(path: &Path) -> Result> { + let content = std::fs::read_to_string(path) + .with_context(|| format!("reading {}", path.display()))?; + let mut entries = Vec::new(); + for line in content.lines() { + if line.trim().is_empty() { + continue; + } + let entry: LocaleEntry = serde_json::from_str(line) + .with_context(|| format!("parsing locale entry in {}", path.display()))?; + entries.push(entry); + } + Ok(entries) +} + /// Extract the `` comment from the body of a way file. pub fn extract_epistemic(content: &str) -> Option { for line in content.lines() { diff --git a/tools/ways-cli/src/main.rs b/tools/ways-cli/src/main.rs index 57a1ec60..27fc7892 100644 --- a/tools/ways-cli/src/main.rs +++ b/tools/ways-cli/src/main.rs @@ -137,6 +137,9 @@ enum Commands { /// Filter to ways supporting this language (code or name) #[arg(long)] filter: Option, + /// Show full per-way coverage detail (default shows uncovered summary) + #[arg(long)] + audit: bool, /// Machine-readable JSON output #[arg(long)] json: bool, @@ -211,6 +214,30 @@ enum Commands { #[arg(long)] confirm: bool, }, + /// Tune embed_threshold values for locale stubs based on corpus similarity + Tune { + /// Ways root directory (default: ~/.claude/hooks/ways) + #[arg(long)] + ways_dir: Option, + /// Filter to ways matching this substring (e.g., "security", "ea/") + #[arg(long)] + way: Option, + /// Write tuned thresholds to .locales.jsonl files + #[arg(long)] + apply: bool, + /// Discrimination audit — flag entries with ambiguous descriptions + #[arg(long)] + audit: bool, + /// Minimum gap for audit (entries below this are flagged, default: 0.15) + #[arg(long, default_value = "0.15")] + audit_threshold: f64, + /// Margin above best non-self score (default: 0.03) + #[arg(long, default_value = "0.03")] + margin: f64, + /// Machine-readable JSON output + #[arg(long)] + json: bool, + }, /// Governance provenance queries — report, trace, control, policy, gaps, stale, active, matrix, lint Governance { #[command(subcommand)] @@ -399,7 +426,7 @@ fn main() -> Result<()> { Commands::Tree { path, jaccard } => cmd::tree::run(path, jaccard), Commands::Provenance { ways_dir } => cmd::provenance::run(ways_dir), Commands::Init { project } => cmd::init::run(project.as_deref()), - Commands::Language { filter, json } => cmd::language::run(filter.as_deref(), json), + Commands::Language { filter, audit, json } => cmd::language::run(filter.as_deref(), audit, json), Commands::Stats { days, project, json, global } => { cmd::stats::run(days, project.as_deref(), json, global) } @@ -443,6 +470,9 @@ fn main() -> Result<()> { } }, Commands::Suggest { file, min_freq } => cmd::suggest::run(file, min_freq), + Commands::Tune { ways_dir, way, apply, audit, audit_threshold, margin, json } => { + cmd::tune::run(ways_dir, way, apply, audit, audit_threshold, margin, json) + } Commands::Reset { session, all, confirm } => { cmd::reset::run(session.as_deref(), all, confirm) } diff --git a/tools/ways-cli/src/util.rs b/tools/ways-cli/src/util.rs index a20f990e..fb2f9bbd 100644 --- a/tools/ways-cli/src/util.rs +++ b/tools/ways-cli/src/util.rs @@ -49,6 +49,87 @@ pub fn load_excluded_segments() -> Vec { .unwrap_or_default() } +/// Extract a locale code from a filename like "security.ja.md" → Some("ja"). +/// Validates against languages.json to avoid false matches (e.g., "foo.setup.md"). +pub fn extract_locale_from_filename(filename: &str) -> Option { + if filename.contains(".check.") { + return None; + } + let stem = filename.strip_suffix(".md")?; + let parts: Vec<&str> = stem.split('.').collect(); + if parts.len() >= 2 { + let candidate = parts[parts.len() - 1]; + if candidate.len() >= 2 + && candidate.len() <= 5 + && candidate.chars().all(|c| c.is_ascii_lowercase() || c == '-') + { + // Validate against languages.json + let parsed: serde_json::Value = + serde_json::from_str(crate::agents::LANGUAGES_JSON).ok()?; + if parsed.get("languages")?.as_object()?.contains_key(candidate) { + return Some(candidate.to_string()); + } + } + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn valid_locale_codes() { + assert_eq!(extract_locale_from_filename("security.ja.md"), Some("ja".to_string())); + assert_eq!(extract_locale_from_filename("security.de.md"), Some("de".to_string())); + assert_eq!(extract_locale_from_filename("security.ar.md"), Some("ar".to_string())); + assert_eq!(extract_locale_from_filename("security.es.md"), Some("es".to_string())); + assert_eq!(extract_locale_from_filename("security.pt-br.md"), Some("pt-br".to_string())); + assert_eq!(extract_locale_from_filename("security.zh-tw.md"), Some("zh-tw".to_string())); + } + + #[test] + fn rejects_non_locale_dotted_names() { + // "setup" is not a language code + assert_eq!(extract_locale_from_filename("foo.setup.md"), None); + // "test" is not a language code + assert_eq!(extract_locale_from_filename("bar.test.md"), None); + // "main" is not a language code + assert_eq!(extract_locale_from_filename("way.main.md"), None); + } + + #[test] + fn rejects_check_files() { + assert_eq!(extract_locale_from_filename("security.check.md"), None); + assert_eq!(extract_locale_from_filename("security.ja.check.md"), None); + } + + #[test] + fn rejects_non_md_extensions() { + assert_eq!(extract_locale_from_filename("security.ja.yaml"), None); + assert_eq!(extract_locale_from_filename("security.ja.sh"), None); + } + + #[test] + fn rejects_plain_way_files() { + // No dot-separated locale segment + assert_eq!(extract_locale_from_filename("security.md"), None); + assert_eq!(extract_locale_from_filename("briefing.md"), None); + } + + #[test] + fn rejects_uppercase_and_numbers() { + assert_eq!(extract_locale_from_filename("way.EN.md"), None); + assert_eq!(extract_locale_from_filename("way.j2.md"), None); + } + + #[test] + fn handles_deeply_dotted_names() { + // Last segment is the locale candidate + assert_eq!(extract_locale_from_filename("some.way.name.ja.md"), Some("ja".to_string())); + } +} + /// Check if a path should be excluded based on schema-defined segments. pub fn is_excluded_path(path: &Path, excluded_segments: &[String]) -> bool { let path_str = match path.to_str() { diff --git a/tools/ways-cli/tests/session_sim.rs b/tools/ways-cli/tests/session_sim.rs index 2d03779e..11a08d68 100644 --- a/tools/ways-cli/tests/session_sim.rs +++ b/tools/ways-cli/tests/session_sim.rs @@ -199,7 +199,7 @@ fn clean_markers(session_id: &str) { // ── Assertion helpers ────────────────────────────────────────── fn assert_marker_exists(way_id: &str, session_id: &str) { - let path = format!("{}/{session_id}/ways/{way_id}/.marker", sessions_root()); + let path = format!("{}/{session_id}/ways/{way_id}/.marker.main", sessions_root()); assert!( Path::new(&path).exists(), "Expected marker for '{way_id}' but it doesn't exist at {path}" @@ -207,7 +207,7 @@ fn assert_marker_exists(way_id: &str, session_id: &str) { } fn assert_marker_absent(way_id: &str, session_id: &str) { - let path = format!("{}/{session_id}/ways/{way_id}/.marker", sessions_root()); + let path = format!("{}/{session_id}/ways/{way_id}/.marker.main", sessions_root()); assert!( !Path::new(&path).exists(), "Expected NO marker for '{way_id}' but found one at {path}"