Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
0013139
ARD Grouping Structure test
codingPF Dec 20, 2025
d829e47
init 2.0
codingPF Dec 23, 2025
12627e2
init 2.0 dependency
codingPF Dec 23, 2025
c939829
update datasource and config
codingPF Dec 23, 2025
14891e4
update
codingPF Dec 29, 2025
243e12c
update zdf junit
codingPF Dec 30, 2025
474b45b
Update last seen logic
codingPF Jan 2, 2026
8bb41f7
update filter logic and av. check
codingPF Jan 2, 2026
f84cc5a
fix db retrieve and add more logging
codingPF Jan 3, 2026
433605b
move logic for daysToCrawl into util method
codingPF Jan 5, 2026
8857f33
remove livestream from diff list
codingPF Jan 5, 2026
0faabee
fix availability check
codingPF Jan 5, 2026
b6da2c9
add column
codingPF Jan 5, 2026
45c8d35
DB config via yaml, move atomic filmlist, backwards compatibility
codingPF Jan 9, 2026
582ca85
zdf partner2sender, ard generate urls, checkUrlAv
codingPF Jan 23, 2026
b0d7936
Merge branch 'feature/ArdGroupStructure' into develop3
codingPF Jan 24, 2026
75f36a5
Merge branch 'feature/ArdGroupStructure' into develop3
codingPF Jan 25, 2026
7b2199e
update ard crawler
codingPF Jan 25, 2026
cad68da
add more url sources #1123
codingPF Feb 14, 2026
81e7884
remove stacktrace from srf website url rebuild, limit number of films
codingPF Feb 16, 2026
bc19de5
typo
codingPF Feb 16, 2026
e659c60
http 429 in jsonresttask
codingPF Feb 20, 2026
3b0398f
kika geo, dw old resolutions, ard fix typo, testcase and remove related
codingPF Feb 21, 2026
a94230a
Merge branch 'develop' into develop2
pidoubleyou Mar 1, 2026
129e281
add ratelimiter, cleanup comments, fix ard test, DW backwards compat fix
codingPF Mar 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 43 additions & 20 deletions MServer-Config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
#### Server configurations ####

# Film DB
databaseConfig:
active: false
url: "jdbc:postgresql://localhost:55432/crawler"
username: "crawler"
password: "secret"
refreshIntervalInDays: 7
checkUrlIntervalInDays: 3
batchSize: 2000

# The maximum amount of cpu threads to be used.
maximumCpuThreads: 10

Expand All @@ -17,17 +27,16 @@ maximumRequestsPerSecond: 999.0

# If set only these Sender will be crawled all other will be ignored.
senderIncluded:
#- ARD
- ARTE_DE
- ARTE_FR
- ARTE_PL
- ARTE_IT
- ARTE_ES
- ARTE_EN
- ARD
#- ARTE_DE
#- ARTE_FR
#- ARTE_PL
#- ARTE_IT
#- ARTE_ES
#- ARTE_EN
#- DREISAT
#- FUNK
#- KIKA
# - DW
#- DW
#- ORF
#- PHOENIX
#- SRF
Expand Down Expand Up @@ -99,8 +108,8 @@ filmlistIdFilePath: target/filmlists/filmlist.id.xx

# import additional filmlist sources
importFilmlistConfigurations :
- active: false
path: "someCrawlerlist.json"
- active: true
path: "Filmliste-akt"
format: OLD_JSON
createDiff: false
checkImportListUrl: false
Expand All @@ -113,7 +122,17 @@ importFilmlistConfigurations :
path: "https://verteiler1.mediathekview.de/filme-org.xz"
format: OLD_JSON_COMPRESSED_XZ
createDiff: true
checkImportListUrl: true
checkImportListUrl: true
- active: false
path: "https://verteiler1.mediathekview.de/Filmliste-akt.xz"
format: OLD_JSON_COMPRESSED_XZ
createDiff: false
checkImportListUrl: false
- active: false
path: jdbc
format: OLD_JSON
createDiff: false
checkImportListUrl: false

# film url is consider invalid if the size is below the minSize
checkImportListUrlMinSize: 5012
Expand Down Expand Up @@ -141,7 +160,7 @@ maximumSubpages: 5
maximumDaysForSendungVerpasstSection: 7

# The maximum amount of days going to future will be crawled for the "Sendung Verpasst?" section.
maximumDaysForSendungVerpasstSectionFuture: 0
maximumDaysForSendungVerpasstSectionFuture: 3

# The time in seconds before a socket connection should time out.
socketTimeoutInSeconds: 60
Expand All @@ -151,11 +170,12 @@ socketTimeoutInSeconds: 60
senderConfigurations:
ARD:
# Actually the ARD has a maximum of 6 days in the past
maximumDaysForSendungVerpasstSection: 1
maximumDaysForSendungVerpasstSection: 6
maximumDaysForSendungVerpasstSectionFuture: 6
#2,4,8 ok
maximumUrlsPerTask: 32
#10,20,40 ok
maximumSubpages: 0
maximumSubpages: 40
ORF:
maximumRequestsPerSecond: 10.0
ARTE_DE:
Expand All @@ -171,20 +191,21 @@ senderConfigurations:
ARTE_ES:
maximumSubpages: 6
KIKA:
maximumSubpages: 2
maximumSubpages: 4
maximumRequestsPerSecond: 8.0
ZDF:
maximumDaysForSendungVerpasstSection: 21
maximumRequestsPerSecond: 20
FUNK:
maximumUrlsPerTask: 99
DREISAT:
maximumSubpages: 5
maximumDaysForSendungVerpasstSection: 60
maximumSubpages: 15
maximumDaysForSendungVerpasstSection: 30
maximumDaysForSendungVerpasstSectionFuture: 20
PHOENIX:
maximumSubpages: 500
SRF:
maximumSubpages: 1
maximumSubpages: 25

#### COPY ####
copySettings:
Expand All @@ -206,6 +227,8 @@ copySettings:
# JSON_COMPRESSED: /var/www/mediathekview/filmlisten/filmliste_diff.json.xz
OLD_JSON_COMPRESSED_XZ: copyTarget/filmliste_old_diff.json.xz



#### Logging ####
logSettings:
# The log level for the console.
Expand Down Expand Up @@ -240,4 +263,4 @@ logSettings:

# The pattern of the file name of the archived log files.
# See: https://logging.apache.org/log4j/2.0/manual/appenders.html#RollingFileAppender
logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy}-%i.log
logFileRollingPattern: logs/${date:yyyy-MM}/server-%d{MM-dd-yyyy-HH}-%i.log
22 changes: 17 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@
<wiremock.version>2.35.2</wiremock.version>
<xz.version>1.10</xz.version>
<yacl4j-core.version>0.9.2</yacl4j-core.version>
<docker-maven-plugin.version>0.40.2</docker-maven-plugin.version>
<docker-maven-plugin.version>0.48.0</docker-maven-plugin.version>
<sortpom-maven-plugin.version>3.2.0</sortpom-maven-plugin.version>
</properties>

Expand Down Expand Up @@ -125,6 +125,20 @@

<dependencies>

<!-- PostgreSQL JDBC Driver -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>42.7.4</version>
</dependency>

<!-- HikariCP Connection Pool -->
<dependency>
<groupId>com.zaxxer</groupId>
<artifactId>HikariCP</artifactId>
<version>5.1.0</version>
</dependency>

<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
Expand Down Expand Up @@ -486,24 +500,22 @@
<build>
<plugins>
<!-- Build docker -->

<plugin>
<groupId>io.fabric8</groupId>
<artifactId>docker-maven-plugin</artifactId>
<version>${docker-maven-plugin.version}</version>
<configuration>
<images>
<images>
<image>
<name>mediathekview/mserver:${project.version}</name>

<build>
<from>eclipse-temurin:${maven.compiler.target}</from>
<maintainer>Nicklas Wiegandt &lt;nicklas@wiegandt.eu&gt;</maintainer>
<assembly>
<descriptor>docker-assembly.xml</descriptor>
</assembly>
<env>
<JAVA_OPTS>-Xmx4G</JAVA_OPTS>
<JAVA_OPTS>-Xmx8G</JAVA_OPTS>
<MSERVER_OPTS>config.yaml</MSERVER_OPTS>
</env>
<entryPoint>
Expand Down
62 changes: 62 additions & 0 deletions src/main/docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@

configs:
init_sql:
content: |
CREATE TABLE IF NOT EXISTS filme (
id TEXT PRIMARY KEY, -- eindeutige Film-ID
data JSONB NOT NULL, -- JSON-Daten des Films
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
last_seen TIMESTAMPTZ NOT NULL DEFAULT now().
last_update TIMESTAMPTZ NOT NULL DEFAULT now(),
last_url_check TIMESTAMPTZ NOT NULL DEFAULT now()
);

services:

postgresMV:
image: postgres:18
container_name: crawler-postgres
environment:
POSTGRES_DB: crawler
POSTGRES_USER: crawler
POSTGRES_PASSWORD: secret
TZ: Europe/Berlin
ports:
- "55432:5432"
volumes:
# Persistente Daten
- c:/tmp/pgdata:/var/lib/postgresql
configs:
- source: init_sql
target: /docker-entrypoint-initdb.d/init.sql
restart: unless-stopped

mserver-r1:
image: mediathekview/mserver:4.0.1-SNAPSHOT
environment:
MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml
volumes:
- c:/tmp/filmlists:/filmlists
- c:/tmp/logs:/logs
- c:/tmp/hist:/hist
- c:/tmp/config:/config

mserver-r2:
image: mediathekview/mserver:4.0.1-SNAPSHOT
environment:
MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml
volumes:
- c:/tmp/filmlists:/filmlists
- c:/tmp/logs:/logs
- c:/tmp/hist:/hist
- c:/tmp/config:/config

mserver-checkUrls:
image: mediathekview/mserver:4.0.1-SNAPSHOT
environment:
MSERVER_OPTS: --config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability
volumes:
- /volume1/docker/Projekt-mv2/filmlists:/filmlists
- /volume1/docker/Projekt-mv2/logs:/logs
- /volume1/docker/Projekt-mv2/hist:/hist
- /volume1/docker/Projekt-mv2/config:/config
19 changes: 19 additions & 0 deletions src/main/docker/runDocker
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# all docker command

# run PG
docker compose up -d postgresMV

# cron 01 01 long run
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R1.yaml" mserver-r1

# cron 06-22 1,31 short run
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml" mserver-r2

# cron 22 55 url check
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow checkAvailability" mserver-checkUrls

# on demand - do not run this unless you know what you are doing!
docker compose run -d --rm -e MSERVER_OPTS="--config https://mediathekview.github.io/MVCrawlerConfig/config/MServer-Config-R2.yaml --flow importFilmlistIntoDB" mserver-r3

## docker save -o mserver.tar mediathekview/mserver:4.0.1-SNAPSHOT
## docker load -i mserver.tar
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
/** A manager to load configurations. */
public abstract class ConfigManager<T extends ConfigDTO> {
private T config;
private static final Logger LOG = LogManager.getLogger(ConfigManager.class);
//private static final Logger LOG = LogManager.getLogger(ConfigManager.class);

Check warning on line 14 in src/main/java/de/mediathekview/mserver/base/config/ConfigManager.java

View check run for this annotation

SonarQubeCloud / SonarCloud Code Analysis

This block of commented-out lines of code should be removed.

See more on https://sonarcloud.io/project/issues?id=mediathekview_MServer&issues=AZztu-BJt5t5DmIV0CvW&open=AZztu-BJt5t5DmIV0CvW&pullRequest=1128

protected abstract String getConfigFileName();

Expand Down Expand Up @@ -58,7 +58,8 @@
}
}
} catch(Exception e) {
LOG.debug(e);
//LOG.debug(e);
e.printStackTrace();
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@

/** A POJO with the configs for MServer. */
public class MServerConfigDTO extends MServerBasicConfigDTO implements ConfigDTO {
private MServerDBConfig databaseConfig;
private final MServerCopySettings copySettings;
private final Boolean writeFilmlistHashFileEnabled;
private final String filmlistHashFilePath;
private final Boolean writeFilmlistIdFileEnabled;
private final String filmlistIdFilePath;

/** ignore certain film by title **/
private String ignoreFilmlistPath;
/** add livestreams from external list **/
Expand Down Expand Up @@ -48,6 +50,7 @@ public MServerConfigDTO() {
filmlistSavePaths = new EnumMap<>(FilmlistFormats.class);
filmlistDiffSavePaths = new EnumMap<>(FilmlistFormats.class);
copySettings = new MServerCopySettings();
databaseConfig = new MServerDBConfig();
logSettings = new MServerLogSettingsDTO();
crawlerURLs = new EnumMap<>(CrawlerUrlType.class);

Expand Down Expand Up @@ -242,7 +245,15 @@ public ImportLivestreamConfiguration getImportLivestreamConfiguration() {
public List<ImportFilmlistConfiguration> getImportFilmlistConfigurations() {
return importFilmlistConfigurations;
}


public MServerDBConfig getDatabaseConfig() {
return databaseConfig;
}

public void setDatabaseConfig(MServerDBConfig databaseConfig) {
this.databaseConfig = databaseConfig;
}

/**
* Loads the {@link Sender} specific configuration and if it not exist creates one.
*
Expand Down Expand Up @@ -286,7 +297,8 @@ public boolean equals(final Object o) {
&& Objects.equals(getFilmlistIdFilePath(), that.getFilmlistIdFilePath())
&& Objects.equals(getIgnoreFilmslistPath(), that.getIgnoreFilmslistPath())
&& Objects.equals(getImportLivestreamConfiguration(), that.getImportLivestreamConfiguration())
&& Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations());
&& Objects.equals(getImportFilmlistConfigurations(), that.getImportFilmlistConfigurations())
&& Objects.equals(getDatabaseConfig(), that.getDatabaseConfig());
}

@Override
Expand All @@ -313,7 +325,8 @@ public int hashCode() {
getFilmlistIdFilePath(),
getIgnoreFilmslistPath(),
getImportLivestreamConfiguration(),
getImportFilmlistConfigurations());
getImportFilmlistConfigurations(),
getDatabaseConfig());
}

public void initializeSenderConfigurations() {
Expand Down
Loading
Loading