Skip to content

one question, how to do Incremental learning in drain3 training? #97

@CH-nolyn

Description

@CH-nolyn
import logging
import sys
import time
from util.config_reader import initialize_template_config
from util.httpserver_operation import training_post_model
from drain3.file_persistence import FilePersistence
from drain3 import TemplateMiner


def process_log_training(raw_log_path, query_data):
    logger = logging.getLogger(__name__)
    logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(message)s')
    scenario = query_data["scenario"]
    output_file = f"{scenario}/drain3_state.bin"
    persistence = FilePersistence(output_file)

    template_miner = TemplateMiner(persistence, config=initialize_template_config(profiling_enabled=True))

    line_count = 0
    with open(raw_log_path, encoding='utf-8') as f:
        lines = f.readlines()

    start_time = time.time()
    batch_start_time = start_time
    batch_size = 10000
    # 逐行训练
    for line in lines:
        line = line.rstrip()
        result = template_miner.add_log_message(line)
        line_count += 1
        if line_count % batch_size == 0:
            time_took = time.time() - batch_start_time
            rate = batch_size / time_took
            logger.info(f"Processing line: {line_count}, rate {rate:.1f} lines/sec, "
                        f"{len(template_miner.drain.clusters)} clusters so far.")
            batch_start_time = time.time()
        if result["change_type"] != "none":
            result_json = json.dumps({
                result["cluster_id"]: {
                    "template_mined": result["template_mined"]
                }
            })
            logger.info(f"Input ({line_count}): " + line)
            logger.info("Result: " + result_json)

    time_took = time.time() - start_time
    rate = line_count / time_took
    logger.info(
        f"--- Done processing file in {time_took:.2f} sec. Total of {line_count} lines, rate {rate:.1f} lines/sec, "
        f"{len(template_miner.drain.clusters)} clusters")

    sorted_clusters = sorted(template_miner.drain.clusters, key=lambda it: it.size, reverse=True)
    for cluster in sorted_clusters:
        logger.info(cluster)

    print("Prefix Tree:")
    template_miner.drain.print_tree()
    template_miner.profiler.report(0)

    training_post_model(output_file)

this is my training code ,so how could i train new log by ex-trained model?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions