Skip to content

Commit f734d8b

Browse files
authored
Revise start_fact_cache and finish_fact_cache to use JSON file (ansible#15970)
* Revise start_fact_cache and finish_fact_cache to use JSON file with host list inside it * Revise artifacts path to be relative to the job private_data_dir * Update calls to start_fact_cache and finish_fact_cache to agree with new reference to artifacts_dir * Prevents unnecessary updates to ansible_facts_modified, fixing timestamp-related test failures.
1 parent 872349a commit f734d8b

File tree

3 files changed

+125
-66
lines changed

3 files changed

+125
-66
lines changed

awx/main/tasks/facts.py

Lines changed: 64 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -22,43 +22,51 @@
2222

2323

2424
@log_excess_runtime(logger, debug_cutoff=0.01, msg='Inventory {inventory_id} host facts prepared for {written_ct} hosts, took {delta:.3f} s', add_log_data=True)
25-
def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=None):
25+
def start_fact_cache(hosts, artifacts_dir, timeout=None, inventory_id=None, log_data=None):
26+
log_data = log_data or {}
2627
log_data['inventory_id'] = inventory_id
2728
log_data['written_ct'] = 0
28-
hosts_cached = list()
29-
try:
30-
os.makedirs(destination, mode=0o700)
31-
except FileExistsError:
32-
pass
29+
hosts_cached = []
30+
31+
# Create the fact_cache directory inside artifacts_dir
32+
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
33+
os.makedirs(fact_cache_dir, mode=0o700, exist_ok=True)
3334

3435
if timeout is None:
3536
timeout = settings.ANSIBLE_FACT_CACHE_TIMEOUT
3637

37-
last_filepath_written = None
38+
last_write_time = None
39+
3840
for host in hosts:
39-
hosts_cached.append(host)
41+
hosts_cached.append(host.name)
4042
if not host.ansible_facts_modified or (timeout and host.ansible_facts_modified < now() - datetime.timedelta(seconds=timeout)):
4143
continue # facts are expired - do not write them
4244

43-
filepath = os.sep.join(map(str, [destination, host.name]))
44-
if not os.path.realpath(filepath).startswith(destination):
45-
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
45+
filepath = os.path.join(fact_cache_dir, host.name)
46+
if not os.path.realpath(filepath).startswith(fact_cache_dir):
47+
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
4648
continue
4749

4850
try:
4951
with codecs.open(filepath, 'w', encoding='utf-8') as f:
5052
os.chmod(f.name, 0o600)
5153
json.dump(host.ansible_facts, f)
5254
log_data['written_ct'] += 1
53-
last_filepath_written = filepath
55+
last_write_time = os.path.getmtime(filepath)
5456
except IOError:
55-
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
57+
logger.error(f'facts for host {smart_str(host.name)} could not be cached')
5658
continue
5759

58-
if last_filepath_written:
59-
return os.path.getmtime(last_filepath_written), hosts_cached
60-
61-
return None, hosts_cached
60+
# Write summary file directly to the artifacts_dir
61+
if inventory_id is not None:
62+
summary_file = os.path.join(artifacts_dir, 'host_cache_summary.json')
63+
summary_data = {
64+
'last_write_time': last_write_time,
65+
'hosts_cached': hosts_cached,
66+
'written_ct': log_data['written_ct'],
67+
}
68+
with open(summary_file, 'w', encoding='utf-8') as f:
69+
json.dump(summary_data, f, indent=2)
6270

6371

6472
@log_excess_runtime(
@@ -67,34 +75,54 @@ def start_fact_cache(hosts, destination, log_data, timeout=None, inventory_id=No
6775
msg='Inventory {inventory_id} host facts: updated {updated_ct}, cleared {cleared_ct}, unchanged {unmodified_ct}, took {delta:.3f} s',
6876
add_log_data=True,
6977
)
70-
def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job_id=None, inventory_id=None):
78+
def finish_fact_cache(artifacts_dir, job_id=None, inventory_id=None, log_data=None):
79+
log_data = log_data or {}
7180
log_data['inventory_id'] = inventory_id
7281
log_data['updated_ct'] = 0
7382
log_data['unmodified_ct'] = 0
7483
log_data['cleared_ct'] = 0
84+
# The summary file is directly inside the artifacts dir
85+
summary_path = os.path.join(artifacts_dir, 'host_cache_summary.json')
86+
if not os.path.exists(summary_path):
87+
logger.error(f'Missing summary file at {summary_path}')
88+
return
7589

76-
hosts_cached = sorted((h for h in hosts_cached if h.id is not None), key=lambda h: h.id)
77-
90+
try:
91+
with open(summary_path, 'r', encoding='utf-8') as f:
92+
summary = json.load(f)
93+
facts_write_time = os.path.getmtime(summary_path) # After successful read
94+
except (json.JSONDecodeError, OSError) as e:
95+
logger.error(f'Error reading summary file at {summary_path}: {e}')
96+
return
97+
98+
host_names = summary.get('hosts_cached', [])
99+
hosts_cached = Host.objects.filter(name__in=host_names).order_by('id').iterator()
100+
# Path where individual fact files were written
101+
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
78102
hosts_to_update = []
103+
79104
for host in hosts_cached:
80-
filepath = os.sep.join(map(str, [destination, host.name]))
81-
if not os.path.realpath(filepath).startswith(destination):
82-
system_tracking_logger.error('facts for host {} could not be cached'.format(smart_str(host.name)))
105+
filepath = os.path.join(fact_cache_dir, host.name)
106+
if not os.path.realpath(filepath).startswith(fact_cache_dir):
107+
logger.error(f'Invalid path for facts file: {filepath}')
83108
continue
109+
84110
if os.path.exists(filepath):
85111
# If the file changed since we wrote the last facts file, pre-playbook run...
86112
modified = os.path.getmtime(filepath)
87-
if (not facts_write_time) or modified > facts_write_time:
88-
with codecs.open(filepath, 'r', encoding='utf-8') as f:
89-
try:
113+
if not facts_write_time or modified >= facts_write_time:
114+
try:
115+
with codecs.open(filepath, 'r', encoding='utf-8') as f:
90116
ansible_facts = json.load(f)
91-
except ValueError:
92-
continue
117+
except ValueError:
118+
continue
119+
120+
if ansible_facts != host.ansible_facts:
93121
host.ansible_facts = ansible_facts
94122
host.ansible_facts_modified = now()
95123
hosts_to_update.append(host)
96-
system_tracking_logger.info(
97-
'New fact for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)),
124+
logger.info(
125+
f'New fact for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}',
98126
extra=dict(
99127
inventory_id=host.inventory.id,
100128
host_name=host.name,
@@ -104,6 +132,8 @@ def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job
104132
),
105133
)
106134
log_data['updated_ct'] += 1
135+
else:
136+
log_data['unmodified_ct'] += 1
107137
else:
108138
log_data['unmodified_ct'] += 1
109139
else:
@@ -112,9 +142,11 @@ def finish_fact_cache(hosts_cached, destination, facts_write_time, log_data, job
112142
host.ansible_facts = {}
113143
host.ansible_facts_modified = now()
114144
hosts_to_update.append(host)
115-
system_tracking_logger.info('Facts cleared for inventory {} host {}'.format(smart_str(host.inventory.name), smart_str(host.name)))
145+
logger.info(f'Facts cleared for inventory {smart_str(host.inventory.name)} host {smart_str(host.name)}')
116146
log_data['cleared_ct'] += 1
117-
if len(hosts_to_update) > 100:
147+
148+
if len(hosts_to_update) >= 100:
118149
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])
119150
hosts_to_update = []
151+
120152
bulk_update_sorted_by_id(Host, hosts_to_update, fields=['ansible_facts', 'ansible_facts_modified'])

awx/main/tasks/jobs.py

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1093,8 +1093,8 @@ def pre_run_hook(self, job, private_data_dir):
10931093
# where ansible expects to find it
10941094
if self.should_use_fact_cache():
10951095
job.log_lifecycle("start_job_fact_cache")
1096-
self.facts_write_time, self.hosts_with_facts_cached = start_fact_cache(
1097-
job.get_hosts_for_fact_cache(), os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'), inventory_id=job.inventory_id
1096+
self.hosts_with_facts_cached = start_fact_cache(
1097+
job.get_hosts_for_fact_cache(), artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)), inventory_id=job.inventory_id
10981098
)
10991099

11001100
def build_project_dir(self, job, private_data_dir):
@@ -1104,17 +1104,15 @@ def post_run_hook(self, job, status):
11041104
super(RunJob, self).post_run_hook(job, status)
11051105
job.refresh_from_db(fields=['job_env'])
11061106
private_data_dir = job.job_env.get('AWX_PRIVATE_DATA_DIR')
1107-
if (not private_data_dir) or (not hasattr(self, 'facts_write_time')):
1107+
if not private_data_dir:
11081108
# If there's no private data dir, that means we didn't get into the
11091109
# actual `run()` call; this _usually_ means something failed in
11101110
# the pre_run_hook method
11111111
return
11121112
if self.should_use_fact_cache() and self.runner_callback.artifacts_processed:
11131113
job.log_lifecycle("finish_job_fact_cache")
11141114
finish_fact_cache(
1115-
self.hosts_with_facts_cached,
1116-
os.path.join(private_data_dir, 'artifacts', str(job.id), 'fact_cache'),
1117-
facts_write_time=self.facts_write_time,
1115+
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(job.id)),
11181116
job_id=job.id,
11191117
inventory_id=job.inventory_id,
11201118
)
@@ -1580,7 +1578,7 @@ def build_args(self, inventory_update, private_data_dir, passwords):
15801578
# Include any facts from input inventories so they can be used in filters
15811579
start_fact_cache(
15821580
input_inventory.hosts.only(*HOST_FACTS_FIELDS),
1583-
os.path.join(private_data_dir, 'artifacts', str(inventory_update.id), 'fact_cache'),
1581+
artifacts_dir=os.path.join(private_data_dir, 'artifacts', str(inventory_update.id)),
15841582
inventory_id=input_inventory.id,
15851583
)
15861584

awx/main/tests/unit/models/test_jobs.py

Lines changed: 56 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# -*- coding: utf-8 -*-
22
import json
33
import os
4-
import time
5-
64
import pytest
75

86
from awx.main.models import (
@@ -15,6 +13,8 @@
1513

1614
from datetime import timedelta
1715

16+
import time
17+
1818

1919
@pytest.fixture
2020
def ref_time():
@@ -33,15 +33,23 @@ def hosts(ref_time):
3333

3434

3535
def test_start_job_fact_cache(hosts, tmpdir):
36-
fact_cache = os.path.join(tmpdir, 'facts')
37-
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
36+
# Create artifacts dir inside tmpdir
37+
artifacts_dir = tmpdir.mkdir("artifacts")
38+
39+
# Assign a mock inventory ID
40+
inventory_id = 42
41+
42+
# Call the function WITHOUT log_data — the decorator handles it
43+
start_fact_cache(hosts, artifacts_dir=str(artifacts_dir), timeout=0, inventory_id=inventory_id)
44+
45+
# Fact files are written into artifacts_dir/fact_cache/
46+
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
3847

3948
for host in hosts:
40-
filepath = os.path.join(fact_cache, host.name)
49+
filepath = os.path.join(fact_cache_dir, host.name)
4150
assert os.path.exists(filepath)
42-
with open(filepath, 'r') as f:
43-
assert f.read() == json.dumps(host.ansible_facts)
44-
assert os.path.getmtime(filepath) <= last_modified
51+
with open(filepath, 'r', encoding='utf-8') as f:
52+
assert json.load(f) == host.ansible_facts
4553

4654

4755
def test_fact_cache_with_invalid_path_traversal(tmpdir):
@@ -51,43 +59,63 @@ def test_fact_cache_with_invalid_path_traversal(tmpdir):
5159
ansible_facts={"a": 1, "b": 2},
5260
),
5361
]
62+
artifacts_dir = tmpdir.mkdir("artifacts")
63+
inventory_id = 42
5464

55-
fact_cache = os.path.join(tmpdir, 'facts')
56-
start_fact_cache(hosts, fact_cache, timeout=0)
57-
# a file called "foo" should _not_ be written outside the facts dir
58-
assert os.listdir(os.path.join(fact_cache, '..')) == ['facts']
65+
start_fact_cache(hosts, artifacts_dir=str(artifacts_dir), timeout=0, inventory_id=inventory_id)
66+
67+
# Fact cache directory (safe location)
68+
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
69+
70+
# The bad host name should not produce a file
71+
assert not os.path.exists(os.path.join(fact_cache_dir, '../foo'))
72+
73+
# Make sure the fact_cache dir exists and is still empty
74+
assert os.listdir(fact_cache_dir) == []
5975

6076

6177
def test_start_job_fact_cache_past_timeout(hosts, tmpdir):
6278
fact_cache = os.path.join(tmpdir, 'facts')
63-
# the hosts fixture was modified 5s ago, which is more than 2s
64-
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=2)
65-
assert last_modified is None
79+
start_fact_cache(hosts, fact_cache, timeout=2)
6680

6781
for host in hosts:
6882
assert not os.path.exists(os.path.join(fact_cache, host.name))
83+
ret = start_fact_cache(hosts, fact_cache, timeout=2)
84+
assert ret is None
6985

7086

7187
def test_start_job_fact_cache_within_timeout(hosts, tmpdir):
72-
fact_cache = os.path.join(tmpdir, 'facts')
73-
# the hosts fixture was modified 5s ago, which is less than 7s
74-
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=7)
75-
assert last_modified
88+
artifacts_dir = tmpdir.mkdir("artifacts")
7689

90+
# The hosts fixture was modified 5s ago, which is less than 7s
91+
start_fact_cache(hosts, str(artifacts_dir), timeout=7)
92+
93+
fact_cache_dir = os.path.join(artifacts_dir, 'fact_cache')
7794
for host in hosts:
78-
assert os.path.exists(os.path.join(fact_cache, host.name))
95+
filepath = os.path.join(fact_cache_dir, host.name)
96+
assert os.path.exists(filepath)
97+
with open(filepath, 'r') as f:
98+
assert json.load(f) == host.ansible_facts
7999

80100

81101
def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
82102
fact_cache = os.path.join(tmpdir, 'facts')
83-
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
103+
start_fact_cache(hosts, fact_cache, timeout=0)
84104

85105
bulk_update = mocker.patch('awx.main.tasks.facts.bulk_update_sorted_by_id')
106+
107+
# Mock the os.path.exists behavior for host deletion
108+
# Let's assume the fact file for hosts[1] is missing.
86109
mocker.patch('os.path.exists', side_effect=lambda path: hosts[1].name not in path)
87110

88-
# Simulate one host's fact file getting deleted
89-
os.remove(os.path.join(fact_cache, hosts[1].name))
90-
finish_fact_cache(hosts, fact_cache, last_modified)
111+
# Simulate one host's fact file getting deleted manually
112+
host_to_delete_filepath = os.path.join(fact_cache, hosts[1].name)
113+
114+
# Simulate the file being removed by checking existence first, to avoid FileNotFoundError
115+
if os.path.exists(host_to_delete_filepath):
116+
os.remove(host_to_delete_filepath)
117+
118+
finish_fact_cache(fact_cache)
91119

92120
# Simulate side effects that would normally be applied during bulk update
93121
hosts[1].ansible_facts = {}
@@ -102,12 +130,13 @@ def test_finish_job_fact_cache_clear(hosts, mocker, ref_time, tmpdir):
102130
assert hosts[1].ansible_facts == {}
103131
assert hosts[1].ansible_facts_modified > ref_time
104132

105-
bulk_update.assert_called_once_with(Host, [], fields=['ansible_facts', 'ansible_facts_modified'])
133+
# Current implementation skips the call entirely if hosts_to_update == []
134+
bulk_update.assert_not_called()
106135

107136

108137
def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
109138
fact_cache = os.path.join(tmpdir, 'facts')
110-
last_modified, _ = start_fact_cache(hosts, fact_cache, timeout=0)
139+
start_fact_cache(hosts, fact_cache, timeout=0)
111140

112141
bulk_update = mocker.patch('django.db.models.query.QuerySet.bulk_update')
113142

@@ -119,6 +148,6 @@ def test_finish_job_fact_cache_with_bad_data(hosts, mocker, tmpdir):
119148
new_modification_time = time.time() + 3600
120149
os.utime(filepath, (new_modification_time, new_modification_time))
121150

122-
finish_fact_cache(hosts, fact_cache, last_modified)
151+
finish_fact_cache(fact_cache)
123152

124153
bulk_update.assert_not_called()

0 commit comments

Comments
 (0)