Browse Source

Integrating speech and iterating on inference, or something like that.

master
Rob Colbert 3 months ago
parent
commit
96a9c5f7d9
  1. 69
      src/app/models/broadcast-show.ts
  2. 34
      src/app/models/episode.ts
  3. 4
      src/app/models/feed-item.ts
  4. 13
      src/app/models/feed.ts
  5. 29
      src/app/models/lib/speech-personality.ts
  6. 80
      src/app/models/video.ts
  7. 177
      src/app/services/feed.ts
  8. 175
      src/app/services/openai.ts
  9. 160
      src/app/services/video.ts
  10. 20
      src/config/env.ts
  11. 72
      src/speechgen.ts
  12. 139
      src/workers/newsroom/fetch-news.ts
  13. 7
      types/openai.d.ts

69
src/app/models/broadcast-show.ts

@ -0,0 +1,69 @@
// app/models/broadcast-show.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import { Schema, Types, model } from "mongoose";
import { IEpisode } from "./episode";
import { HumanGender } from "./lib/human-gender.ts";
import { ISpeechPersonality, SpeechPersonalitySchema } from "./lib/speech-personality.ts";
export enum BroadcastShowStatus {
Offline = "offline",
Live = "live",
Retired = "retired",
Removed = "removed",
}
export interface IBroadcastShowHost {
name: string;
description: string;
gender: HumanGender;
role: string;
speech: ISpeechPersonality;
}
export const BroadcastShowHostSchema = new Schema({
name: { type: String, required: true },
description: { type: String, required: true },
gender: { type: String, enum: HumanGender, required: true },
role: { type: String, required: true },
personality: { type: SpeechPersonalitySchema, required: true },
});
export interface IBroadcastShowProducer {
name: string;
description: string;
gender: HumanGender;
role: string;
}
export const BroadcastShowProducerSchema = new Schema({
name: { type: String, required: true },
description: { type: String, required: true },
gender: { type: String, enum: HumanGender, required: true },
role: { type: String, required: true },
});
export interface IBroadcastShow {
_id: Types.ObjectId; // MongoDB concern
__v: number; // MongoDB concern
status: BroadcastShowStatus;
title: string;
description: string;
producers: Array<IBroadcastShowProducer>;
hosts: Array<IBroadcastShowHost>;
recentEpisodes: Array<IEpisode | Types.ObjectId>;
}
export const BroadcastShowSchema = new Schema<IBroadcastShow>({
status: { type: String, enum: BroadcastShowStatus, default: BroadcastShowStatus.Offline, required: true },
title: { type: String, required: true },
description: { type: String, required: true },
producers: { type: [BroadcastShowProducerSchema], default: [ ], required: true },
hosts: { type: [BroadcastShowHostSchema], default: [ ], required: true },
recentEpisodes: { type: [Types.ObjectId], default: [ ], required: true, ref: 'Episode' },
});
export const BroadcastShow = model<IBroadcastShow>("BroadcastShow", BroadcastShowSchema);
export default BroadcastShow;

34
src/app/models/episode.ts

@ -0,0 +1,34 @@
// app/models/feed.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import { Schema, Types, model } from "mongoose";
import { IBroadcastShow } from "./broadcast-show";
import { IVideo } from "./video";
import { IFeedItem } from "./feed-item";
export interface IEpisode {
_id: Types.ObjectId; // MongoDB concern
__v: number; // MongoDB concern
created: Date;
show: IBroadcastShow;
title: string;
description: string;
video?: IVideo | Types.ObjectId;
feedItems?: Array<IFeedItem | Types.ObjectId>;
}
export const EpisodeSchema = new Schema<IEpisode>({
created: { type: Date, default: Date.now, required: true, index: -1 },
show: { type: Schema.ObjectId, required: true, index: 1, ref: "BroadcastShow" },
title: { type: String, required: true },
description: { type: String, required: true },
video: { type: Schema.ObjectId, ref: 'Video' },
feedItems: { type: [Schema.ObjectId], ref: 'FeedItem' },
});
export const Episode = model<IEpisode>("Episode", EpisodeSchema);
export default Episode;

4
src/app/models/feed-item.ts

@ -4,6 +4,7 @@
import { Schema, Types, model } from "mongoose"; import { Schema, Types, model } from "mongoose";
import { IFeed } from "./feed"; import { IFeed } from "./feed";
import { IVideo } from "./video";
export interface IFeedItem { export interface IFeedItem {
_id: Types.ObjectId; _id: Types.ObjectId;
@ -17,6 +18,8 @@ export interface IFeedItem {
description?: string; description?: string;
body?: string; body?: string;
summary?: string; summary?: string;
videos: Array<IVideo | Types.ObjectId>
} }
export const FeedItemSchema = new Schema<IFeedItem>({ export const FeedItemSchema = new Schema<IFeedItem>({
@ -27,6 +30,7 @@ export const FeedItemSchema = new Schema<IFeedItem>({
description: { type: String }, description: { type: String },
body: { type: String }, body: { type: String },
summary: { type: String }, summary: { type: String },
videos: { type: [Types.ObjectId], default: [ ], ref: "Video" },
}); });
export const FeedItem = model<IFeedItem>("FeedItem", FeedItemSchema); export const FeedItem = model<IFeedItem>("FeedItem", FeedItemSchema);

13
src/app/models/feed.ts

@ -3,20 +3,31 @@
// All Rights Reserved // All Rights Reserved
import { Schema, Types, model } from "mongoose"; import { Schema, Types, model } from "mongoose";
import { IFeedItem } from "./feed-item";
export interface IFeed { export interface IFeed {
_id: Types.ObjectId; _id: Types.ObjectId;
__v: number; __v: number;
created: Date;
updated: Date;
title: string; title: string;
description: string; description: string;
url: string; url: string;
web: string;
latestItem?: IFeedItem | Types.ObjectId;
} }
const FeedSchema = new Schema<IFeed>({ const FeedSchema = new Schema<IFeed>({
created: { type: Date, required: true },
updated: { type: Date },
title: { type: String, required: true }, title: { type: String, required: true },
description: { type: String }, description: { type: String, required: true },
url: { type: String, required: true }, url: { type: String, required: true },
web: { type: String, required: true },
latestItem: { type: Schema.ObjectId, ref: 'FeedItem' },
}); });
export const Feed = model<IFeed>("Feed", FeedSchema); export const Feed = model<IFeed>("Feed", FeedSchema);

29
src/app/models/lib/speech-personality.ts

@ -0,0 +1,29 @@
// app/models/lib/ai-speech-personality.ts
// Copyright (C) 2025 DTP Technologies
// All Rights Reserved
import { Schema } from "mongoose";
export enum SpeechVoice {
Allow = 'alloy',
Ash = 'ash',
Coral = 'coral',
Echo = 'echo',
Fable = 'fable',
Onyx = 'onyx',
Nova = 'nova',
Sage = 'sage',
Shimmer = 'shimmer',
}
export interface ISpeechPersonality {
model: string;
voice: SpeechVoice;
role: string;
}
export const SpeechPersonalitySchema = new Schema<ISpeechPersonality>({
model: { type: String, required: true },
voice: { type: String, enum: SpeechVoice, required: true },
role: { type: String, required: true },
});

80
src/app/models/video.ts

@ -0,0 +1,80 @@
// app/models/video.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import { Schema, Types, model } from "mongoose";
export enum VideoStatus {
Pending = "pending",
Processing = "processing",
Live = "live",
Removed = "removed",
}
export interface IMediaFile {
bucket: string;
key: string;
mime: string;
size: number;
}
export const MediaFileSchema = new Schema<IMediaFile>({
bucket: { type: String, required: true },
key: { type: String, required: true },
mime: { type: String, required: true },
size: { type: Number, required: true },
});
export interface IVideoMetadata {
width: number;
height: number;
fps: number;
bitRate: number;
}
export const VideoMetadataSchema = new Schema<IVideoMetadata>({
width: { type: Number, required: true },
height: { type: Number, required: true },
fps: { type: Number, required: true },
bitRate: { type: Number, required: true },
}, { _id: false });
export interface IAudioMetadata {
channelCount: number;
sampleRate: number;
bitRate: number;
}
export const AudioMetadataSchema = new Schema<IAudioMetadata>({
channelCount: { type: Number, required: true },
sampleRate: { type: Number, required: true },
bitRate: { type: Number, required: true },
}, { _id: false });
export interface IVideo {
_id: Types.ObjectId;
__v: number;
created: Date;
status: VideoStatus;
title: string;
description: string;
file?: IMediaFile;
metadata: {
video: IVideoMetadata;
audio: IAudioMetadata;
}
}
export const VideoSchema = new Schema<IVideo>({
created: { type: Date, default: Date.now, required: true, index: -1 },
status: { type: String, enum: VideoStatus, default: VideoStatus.Pending, required: true, index: 1 },
title: { type: String, required: true },
description: { type: String, required: true },
file: { type: MediaFileSchema },
metadata: {
video: { type: VideoMetadataSchema, required: true },
audio: { type: AudioMetadataSchema, required: true },
},
});
export const Video = model<IVideo>("Video", VideoSchema);
export default Video;

177
src/app/services/feed.ts

@ -0,0 +1,177 @@
// app/services/feed.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import { PopulateOptions, Types } from 'mongoose';
import UserAgent from "user-agents";
import FeedItem, { IFeedItem } from '../models/feed-item.js';
import Feed, { IFeed } from '../models/feed.js';
import TextService from './text.js';
import { extractFromXml, FeedData } from "@extractus/feed-extractor";
import {
DtpService,
DtpPlatform,
WebPaginationParameters,
DtpServiceUpdate,
WebError,
} from '../../lib/dtplib.js';
/**
* Interface to be used when creating and updating RSS feed records.
*/
export interface FeedDefinition {
title: string;
description: string;
url: string;
web: string;
}
/**
* Interface to be used when fetching paginated views of FeedItem records.
*/
export interface FeedItemLibrary {
items: Array<IFeedItem>;
totalItemCount: number;
}
export class FeedService extends DtpService {
static get name ( ) { return 'FeedService'; }
static get slug ( ) { return 'feed'; }
userAgent: UserAgent = new UserAgent();
populateFeed: Array<PopulateOptions>;
populateFeedItem: Array<PopulateOptions>;
constructor (platform: DtpPlatform) {
super(platform, FeedService);
this.populateFeed = [
{
path: "latestItem",
}
];
this.populateFeedItem = [
{
path: 'feed',
},
];
}
/**
* Create an RSS feed and start ingesting its items.
* @param definition FeedDefinition The options and parameters to be used when
* creating the feed.
* @returns An IFeed interface to the newly-created feed.
*/
async create (definition: FeedDefinition) : Promise<IFeed> {
const textService = this.getService<TextService>("text");
const feed = new Feed();
feed.title = textService.filter(definition.title);
feed.description = textService.filter(definition.description);
feed.url = textService.filter(definition.url);
feed.web = textService.filter(definition.web);
this.log.info("creating RSS feed", { _id: feed._id, title: feed.title });
await feed.save();
return feed.toObject();
}
async update (
feed: IFeed | Types.ObjectId,
definition: FeedDefinition,
) : Promise<IFeed> {
const textService = this.getService<TextService>("text");
const update: DtpServiceUpdate = { };
update.$set = { };
update.$unset = { };
update.$set.title = textService.filter(definition.title);
update.$set.description = textService.filter(definition.description);
update.$set.url = textService.filter(definition.url);
update.$set.web = textService.filter(definition.web);
this.log.info("updating RSS feed", { _id: feed._id, title: definition.title });
const newFeed = await Feed.findByIdAndUpdate(feed._id, update, {
new: true,
populate: this.populateFeed,
}).lean();
if (!newFeed) {
throw new WebError(500, "Failed to update feed");
}
return newFeed;
}
async getById (feedId: Types.ObjectId) : Promise<IFeed | null> {
const feed = await Feed
.findById(feedId)
.populate(this.populateFeed)
.lean();
return feed;
}
async getItemsForFeed (
feed: IFeed | Types.ObjectId,
pagination: WebPaginationParameters,
) : Promise<FeedItemLibrary> {
const search = { feed: feed._id };
const items = await FeedItem
.find(search)
.sort({ created: -1 })
.skip(pagination.skip)
.limit(pagination.cpp)
.populate(this.populateFeedItem)
.lean();
const totalItemCount = await FeedItem.countDocuments(search);
return { items, totalItemCount };
}
async getUnifiedFeed (
pagination: WebPaginationParameters,
) : Promise<FeedItemLibrary> {
const items = await FeedItem
.find()
.sort({ created: -1 })
.skip(pagination.skip)
.limit(pagination.cpp)
.populate(this.populateFeedItem)
.lean();
const totalItemCount = await FeedItem.estimatedDocumentCount();
return { items, totalItemCount };
}
async fetchRssFeed (feed: IFeed) : Promise<FeedData> {
const userAgent = this.userAgent.toString();
const headers = {
"User-Agent":
userAgent ||
`DtpNewsroom/1.0 (https://digitaltelepresence.com/)`,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
Referrer: feed.url,
};
this.log.debug("fetching RSS feed content", { userAgent, headers });
const response = await fetch(feed.url, { method: "GET", headers });
if (!response.ok) {
throw new Error(`Failed to fetch RSS feed: ${response.statusText} (${response.status})`);
}
const xml = await response.text();
return extractFromXml(xml);
}
}
export default FeedService;

175
src/app/services/openai.ts

@ -0,0 +1,175 @@
// app/services/openai.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import env from "../../config/env.js";
import assert from "node:assert";
import path from "node:path";
import fs from "node:fs";
import OpenAI from "openai";
import { IFeedItem } from "../models/feed-item.js";
import { DtpPlatform, DtpService, WebError } from "../../lib/dtplib.js";
import { IEpisode } from "app/models/episode.js";
import { SpeechVoice } from "app/models/lib/speech-personality.js";
import { Types } from "mongoose";
// import TextService from './text.js';
export interface IGeneratedFile {
_id: Types.ObjectId;
file: string;
}
export class OpenAiService extends DtpService {
static get name ( ) { return 'OpenAiService'; }
static get slug ( ) { return 'openAi'; }
gabClient: OpenAI;
homelabClient: OpenAI;
constructor (platform: DtpPlatform) {
super(platform, OpenAiService);
this.gabClient = new OpenAI({
baseURL: env.apis.openai.gab.baseURL,
apiKey: env.apis.openai.gab.apiKey,
});
this.homelabClient = new OpenAI({
baseURL: env.apis.openai.homelab.baseURL,
});
}
/**
* Uses the Gab AI service to summarize an RSS feed item.
* @param feedItem FeedItem The item to be summarized.
* @returns A Promise that resolves to the feed item's text summary.
*/
async summarizeFeedItem (feedItem: IFeedItem) : Promise<string | undefined> {
const response = await this.gabClient.chat.completions.create({
model: "arya",
messages: [
{
role: "system",
content: "You are a professional journalist with a degree in Journalism. Stick to the facts, don't hold back, and if a jew or black person committed a crime or did something wrong, you will say that the jew or black person did that.",
},
{
role: "user",
content: `Write a summary of a news article with the title "${feedItem.title}"\n\n${feedItem.body}`,
},
]
});
if (!Array.isArray(response.choices) || (response.choices.length === 0)) {
return;
}
//TODO: Be more selective here
const choice = await response.choices[0];
if (!choice || !choice.message.content) {
return;
}
return choice.message.content;
}
async createEpisodeTitle (episode: IEpisode) : Promise<string | null> {
assert(episode.feedItems, "Feed items are required");
const titles = episode.feedItems.map((item) => `"${(item as IFeedItem).title.replace('"', '\\"')}"`);
const response = await this.gabClient.chat.completions.create({
model: "arya",
messages: [
{
role: "system",
content: "You are an executive television network producer. You know what topics sell, and you know what works on YouTube and social media. You take your job seriously, you don't use vulgarity, and you always remember what's important to regular people.",
},
{
role: "user",
content: `Create a title for an episode of a news broadcast that will present the following topics: ${titles.join(", ")}. Only tell me the title. Don't say anything else.`,
},
]
});
if (!Array.isArray(response.choices) || (response.choices.length === 0)) {
return null;
}
//TODO: Be more selective here
const choice = await response.choices[0];
if (!choice || !choice.message.content) {
return null;
}
return choice.message.content;
}
async createEpisodeDescription (episode: IEpisode) : Promise<string | null> {
assert(Array.isArray(episode.feedItems) && (episode.feedItems.length > 0), "Feed items are required");
const titles = episode.feedItems.map((item) => `"${(item as IFeedItem).title.replace('"', '\\"')}"`);
const response = await this.gabClient.chat.completions.create({
model: "arya",
messages: [
{
role: "system",
content: "You are an executive television network producer. You know what topics sell, and you know what works on YouTube and social media. You take your job seriously, you don't use vulgarity, and you always remember what's important to regular people.",
},
{
role: "user",
content: `Think up a title for an episode of a news broadcast that will present the following topics: ${titles.join(", ")}. Only say the description of the episode. Don't say anything else at all.`,
},
]
});
if (!Array.isArray(response.choices) || (response.choices.length === 0)) {
return null;
}
//TODO: Be more selective here
const choice = await response.choices[0];
if (!choice || !choice.message.content) {
return null;
}
return choice.message.content;
}
async generateSpeech (input: string, model: string, voice: SpeechVoice) : Promise<IGeneratedFile> {
const audioId = new Types.ObjectId();
const audioFile = path.join(env.root, audioId.toString());
const response = await this.homelabClient.audio.speech.create({
input,
model,
voice
});
if (!response.ok) {
throw new WebError(response.status, `failed to generate speech audio: ${response.statusText}`);
}
assert(response.body, "A response body is required");
await fs.promises.rm(audioFile, { force: true });
this.log.info("receiving audio to file:", { audioFile });
// eslint-disable-next-line
await this.streamResponseToFile(response.body as any, audioFile);
return { _id: audioId, file: audioFile };
}
async streamResponseToFile (stream: NodeJS.ReadableStream, path: string) : Promise<void> {
return new Promise((resolve, reject) => {
const writeStream = fs.createWriteStream(path);
stream.pipe(writeStream)
.on('error', (error) => {
writeStream.close();
reject(error);
})
.on('finish', resolve);
});
}
}
export default OpenAiService;

160
src/app/services/video.ts

@ -0,0 +1,160 @@
// app/services/video.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import env from "../../config/env.js";
import { Types } from 'mongoose';
import Video, { IVideo, VideoStatus } from '../models/video.js';
import TextService from './text.js';
import {
DtpService,
DtpPlatform,
WebPaginationParameters,
DtpServiceUpdate,
WebError,
} from '../../lib/dtplib.js';
import Bull from 'bull';
import JobQueueService from './job-queue.js';
import MinioService from "./minio.js";
/**
* Interface to be used when creating and updating videos.
*/
export interface VideoDefinition {
title: string;
description: string;
}
/**
* Interface to be used when fetching paginated views of videos.
*/
export interface VideoLibrary {
videos: Array<IVideo>;
totalVideoCount: number;
}
export class VideoService extends DtpService {
static get name ( ) { return 'VideoService'; }
static get slug ( ) { return 'video'; }
jobQueue?: Bull.Queue;
constructor (platform: DtpPlatform) {
super(platform, VideoService);
}
async start ( ) : Promise<void> {
const jobQueueService = this.getService<JobQueueService>("jobQueue");
this.jobQueue = jobQueueService.getJobQueue("video", env.jobQueues.video);
}
async create (definition: VideoDefinition, file: Express.Multer.File) : Promise<IVideo> {
const textService = this.getService<TextService>("text");
const minioService = this.getService<MinioService>("minio");
const NOW = new Date();
const video = new Video();
video.created = NOW;
video.status = VideoStatus.Pending;
video.title = textService.filter(definition.title);
video.description = textService.filter(definition.description);
video.metadata = {
video: {
width: 1920,
height: 1080,
fps: 30,
bitRate: 4 * 1024 * 1000,
},
audio: {
channelCount: 2,
sampleRate: 48000,
bitRate: 160000,
},
};
video.file = {
bucket: env.minio.buckets.videos,
key: video._id.toString(),
mime: file.mimetype,
size: file.size,
};
const minioFile = await minioService.uploadFile({
bucket: video.file.bucket,
key: video.file.key,
filePath: file.path,
metadata: {
'Content-Type': file.mimetype,
'Content-Length': file.size.toString(),
},
});
this.log.info("video file uploaded to storage", { etag: minioFile.etag });
this.log.info("creating video record", { videoId: video._id, title: video.title });
await video.save();
return video.toObject();
}
async update (video: IVideo | Types.ObjectId, definition: VideoDefinition) : Promise<IVideo> {
const textService = this.getService<TextService>("text");
const update: DtpServiceUpdate = { };
update.$set = { };
update.$set.title = textService.filter(definition.title);
update.$set.description = textService.filter(definition.description);
this.log.info("updating video", { videoId: video._id });
const newVideo = await Video
.findByIdAndUpdate(video._id, update, {
new: true,
})
.lean();
if (!newVideo) {
throw new WebError(500, "Failed to update video");
}
return newVideo;
}
async setStatus (video: IVideo | Types.ObjectId, status: VideoStatus) : Promise<void> {
this.log.info("setting video status", { videoId: video._id, status });
await Video.findByIdAndUpdate(video._id, { $set: { status } });
}
async getById (videoId: Types.ObjectId) : Promise<IVideo | null> {
const video = await Video.findById(videoId).lean();
return video;
}
async getAll (pagination: WebPaginationParameters) : Promise<VideoLibrary> {
const videos = await Video
.find({ })
.sort({ created: -1 })
.skip(pagination.skip)
.limit(pagination.cpp)
.lean();
const totalVideoCount = await Video.estimatedDocumentCount();
return { videos, totalVideoCount };
}
async remove (video: IVideo) : Promise<void> {
if (video.file) {
const minioService = this.getService<MinioService>("minio");
this.log.info("removing video file", {
_id: video._id,
title: video.title,
bucket: video.file.bucket,
key: video.file.key,
});
await minioService.removeObject(video.file?.bucket, video.file?.key);
}
this.log.info("removing video record", { videoId: video._id, title: video.title });
await Video.deleteOne({ _id: video._id });
}
}
export default VideoService;

20
src/config/env.ts

@ -56,6 +56,7 @@ export default {
buckets: { buckets: {
images: process.env.DTP_MINIO_IMAGE_BUCKET || "dtp-images", images: process.env.DTP_MINIO_IMAGE_BUCKET || "dtp-images",
videos: process.env.DTP_MINIO_VIDEO_BUCKET || "dtp-videos", videos: process.env.DTP_MINIO_VIDEO_BUCKET || "dtp-videos",
audios: process.env.DTP_MINIO_AUDIO_BUCKET || "dtp-audios",
attachments: process.env.DTP_MINIO_ATTACHMENT_BUCKET || "dtp-attachments", attachments: process.env.DTP_MINIO_ATTACHMENT_BUCKET || "dtp-attachments",
}, },
}, },
@ -80,9 +81,15 @@ export default {
secret: process.env.DTP_GOOGLE_SECRET, secret: process.env.DTP_GOOGLE_SECRET,
}, },
openai: { openai: {
enabled: process.env.DTP_OPENAI === "enabled", gab: {
baseURL: process.env.DTP_OPENAI_BASEURL, enabled: process.env.DTP_OPENAI_GAB === "enabled",
apiKey: process.env.DTP_OPENAI_KEY, baseURL: process.env.DTP_OPENAI_GAB_BASEURL,
apiKey: process.env.DTP_OPENAI_GAB_KEY,
},
homelab: {
enabled: process.env.DTP_OPENAI_HOMELAB === "enabled",
baseURL: process.env.DTP_OPENAI_HOMELAB_BASEURL,
},
}, },
}, },
email: { email: {
@ -116,6 +123,13 @@ export default {
removeOnComplete: true, removeOnComplete: true,
removeOnFail: false, removeOnFail: false,
}, },
video: {
priority: 10,
delay: 0,
attempts: 3,
removeOnComplete: true,
removeOnFail: false,
},
}, },
log: { log: {
http: { http: {

72
src/speechgen.ts

@ -0,0 +1,72 @@
// speechgen.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
import assert from "node:assert";
import fs from "node:fs";
import { DtpProcess } from "./lib/dtplib.js";
import { SpeechVoice } from "./app/models/lib/speech-personality.js";
import OpenAiService, { IGeneratedFile } from "app/services/openai.js";
class SpeechGenerator extends DtpProcess {
static get name ( ) { return "SpeechGenerator"; }
static get slug ( ) { return "speechgen"; }
constructor ( ) {
super(SpeechGenerator);
}
async generate (model: string, voice: SpeechVoice, input: string) : Promise<IGeneratedFile> {
try {
console.log("requesting audio resource");
const openAiService = this.getService<OpenAiService>("openAi");
return openAiService.generateSpeech(input, model, voice);
} catch (error) {
throw new Error("failed to generate speech audio file", { cause: error });
}
}
async reportStats ( ) : Promise<void> {
this.log.info("this process does not report statistics");
}
}
(async ( ) => {
try {
console.log("Speech Generator: A command line tool to generate audio");
const generator = new SpeechGenerator();
const model = process.argv[2];
assert(model, "Must specify model (tts-1, tts-1-hd)");
const voice = process.argv[3] as SpeechVoice;
assert(voice, "Must specify voice");
const inputFile = process.argv[4];
assert(inputFile, "Must specify input filename");
const inputText = await fs.promises.readFile(inputFile, "utf-8");
console.log("generating audio");
const response = await generator.generate(model, voice, inputText);
generator.log.info("speech audio file generated", response);
} catch (error) {
console.error("audiogen has failed", error);
}
})();
/*
* NOTEPAD
*
* import env from "./config/env.js";
* import MinioService from "app/services/minio.js";
*
* const minioService = this.getService<MinioService>("minio");
* const bucket = env.minio.buckets.audios;
* const key = `${response._id.toString()}.wav`;
* const info = await minioService.uploadFile({ bucket, key, filePath: response.file });
*/

139
src/workers/newsroom/fetch-news.ts

@ -5,20 +5,23 @@
import env from "../../config/env.js"; import env from "../../config/env.js";
import assert from "node:assert"; import assert from "node:assert";
import path from "node:path";
import fs from "node:fs";
import Bull from "bull"; import Bull from "bull";
import OpenAI from "openai";
import { JSDOM } from "jsdom"; import { JSDOM } from "jsdom";
import { DtpBase } from "../../lib/dtplib.js"; import { DtpBase } from "../../lib/dtplib.js";
import { DtpWorker } from "../../lib/core/worker.js"; import { DtpWorker } from "../../lib/core/worker.js";
import { extractFromXml, FeedData, FeedEntry } from "@extractus/feed-extractor";
import UserAgent from "user-agents"; import UserAgent from "user-agents";
import FeedItem, { IFeedItem } from "../../app/models/feed-item.js"; import FeedItem, { IFeedItem } from "../../app/models/feed-item.js";
import Feed, { IFeed } from "../../app/models/feed.js"; import Feed, { IFeed } from "../../app/models/feed.js";
import { FeedData, FeedEntry } from "@extractus/feed-extractor";
import OpenAiService, { IGeneratedFile } from "app/services/openai.js";
import { IBroadcastShowHost } from "app/models/broadcast-show.js";
import FeedService from "app/services/feed.js";
import { HumanGender } from "app/models/lib/human-gender.js";
import { SpeechVoice } from "app/models/lib/speech-personality.js";
export class FetchNewsJob extends DtpBase { export class FetchNewsJob extends DtpBase {
@ -28,17 +31,14 @@ export class FetchNewsJob extends DtpBase {
worker: DtpWorker; worker: DtpWorker;
jobQueue: Bull.Queue; jobQueue: Bull.Queue;
aiClient: OpenAI;
userAgent: UserAgent = new UserAgent(); userAgent: UserAgent = new UserAgent();
aiService: OpenAiService;
constructor (worker: DtpWorker, jobQueue: Bull.Queue) { constructor (worker: DtpWorker, jobQueue: Bull.Queue) {
super(worker, FetchNewsJob); super(worker, FetchNewsJob);
this.worker = worker; this.worker = worker;
this.jobQueue = jobQueue; this.jobQueue = jobQueue;
this.aiClient = new OpenAI({ this.aiService = this.getService<OpenAiService>("openAi");
baseURL: env.apis.openai.baseURL,
apiKey: env.apis.openai.apiKey,
});
} }
async run ( ) : Promise<void> { async run ( ) : Promise<void> {
@ -50,8 +50,10 @@ export class FetchNewsJob extends DtpBase {
} }
async ingestFeed (feed: IFeed) : Promise<void> { async ingestFeed (feed: IFeed) : Promise<void> {
const feedService = this.getService<FeedService>("feed");
this.log.info("running news fetch job", { title: feed.title }); this.log.info("running news fetch job", { title: feed.title });
const rss: FeedData = await this.loadRssFeed(feed.url); const rss: FeedData = await feedService.fetchRssFeed(feed);
this.log.debug('feed loaded', { this.log.debug('feed loaded', {
feed: { feed: {
@ -63,10 +65,10 @@ export class FetchNewsJob extends DtpBase {
return; return;
} }
/* if (env.NODE_ENV !== "production") {
* Only process 3 entries in dev to prevent burning the AI API quotas.
*/
rss.entries = rss.entries.slice(0, 1); rss.entries = rss.entries.slice(0, 1);
}
for (const entry of rss.entries) { for (const entry of rss.entries) {
this.log.info("scheduling ingest for entry", { this.log.info("scheduling ingest for entry", {
title: entry.title, title: entry.title,
@ -86,7 +88,7 @@ export class FetchNewsJob extends DtpBase {
const entry: FeedEntry = job.data.entry as FeedEntry; const entry: FeedEntry = job.data.entry as FeedEntry;
this.log.info("ingesting news article", { jobId: job.id, title: entry.title }); this.log.info("ingesting news article", { jobId: job.id, title: entry.title });
const rssEntry = await FeedItem.findOneAndUpdate( const feedItem = await FeedItem.findOneAndUpdate(
{ link: entry.link }, { link: entry.link },
{ {
$setOnInsert: { $setOnInsert: {
@ -102,8 +104,23 @@ export class FetchNewsJob extends DtpBase {
{ upsert: true, new: true }, { upsert: true, new: true },
); );
await this.fetchArticleBody(rssEntry); if (!feedItem.isNew) {
// await this.generateAiSummary(rssEntry); return;
}
await this.fetchArticleBody(feedItem);
await this.generateItemSummary(feedItem);
await this.generateItemNarration(feedItem, {
name: "Test Host",
description: "A show host used for testing speech generation.",
gender: HumanGender.Woman,
role: "You are a female anchor of a television news broadcast.",
speech: {
model: "tts-1-hd",
voice: SpeechVoice.Shimmer,
role: "You are a female anchor of a television news broadcast.",
}
});
} }
async fetchArticleBody (rssEntry: IFeedItem) : Promise<void> { async fetchArticleBody (rssEntry: IFeedItem) : Promise<void> {
@ -143,84 +160,22 @@ export class FetchNewsJob extends DtpBase {
); );
} }
async generateAiSummary (rssEntry: IFeedItem) : Promise<void> { async generateItemSummary (feedItem: IFeedItem) : Promise<void> {
const response = await this.aiClient.chat.completions.create({ const openAiService = this.getService<OpenAiService>("openAi");
model: "arya", feedItem.summary = await openAiService.summarizeFeedItem(feedItem);
messages: [
{
role: "system",
content: "You are a news editor with a casual tone."
},
{
role: "user",
content: `Write a summary of a news article with the title "${rssEntry.title}"\n\n${rssEntry.body}`,
},
]
});
if (!Array.isArray(response.choices) || (response.choices.length === 0)) {
return;
}
const choice = await response.choices[0];
if (!choice || !choice.message.content) {
return;
}
rssEntry.summary = choice.message.content;
await FeedItem.updateOne( await FeedItem.updateOne(
{ _id: rssEntry._id }, { _id: feedItem._id },
{ $set: { summary: rssEntry.summary } }, { $set: { summary: feedItem.summary } },
); );
this.log.debug('article summarized', { title: rssEntry.title, summary: rssEntry.summary }); this.log.debug('article summarized', { title: feedItem.title, summary: feedItem.summary });
}
async generateAiNarration (rssEntry: IFeedItem) : Promise<IFeedItem> {
assert(rssEntry.description, "Feed entry description is required");
const response = await this.aiClient.audio.speech.create({
input: rssEntry.description,
model: "tts-1",
voice: "ash",
response_format: "wav",
});
const contentType = response.headers.get("content-type");
if (!contentType) {
this.log.error("AI response doesn't specify content type");
return rssEntry;
}
if (!contentType.includes("wav")) {
this.log.error("AI response isn't wave audio");
return rssEntry;
}
const data = await response.bytes();
await fs.promises.writeFile(path.join(env.root, `${rssEntry._id}.wav`), data, "binary");
return rssEntry;
}
async loadRssFeed (url: string) : Promise<FeedData> {
const userAgent = this.userAgent.toString();
const headers = {
"User-Agent":
userAgent ||
`DtpNewsroom/1.0 (https://digitaltelepresence.com/)`,
Accept:
"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
Referrer: url,
};
this.log.debug("fetching RSS feed content", { userAgent, headers });
const response = await fetch(url, { method: "GET", headers });
if (!response.ok) {
throw new Error(`Failed to fetch RSS feed: ${response.statusText} (${response.status})`);
} }
const xml = await response.text(); async generateItemNarration (
return extractFromXml(xml); feedItem: IFeedItem,
host: IBroadcastShowHost,
) : Promise<IGeneratedFile> {
const openAiService = this.getService<OpenAiService>("openAi");
assert(feedItem.summary, "Feed item summary is required");
return openAiService.generateSpeech(feedItem.summary, host.speech.model, host.speech.voice);
} }
} }

7
types/openai.d.ts

@ -0,0 +1,7 @@
// types/openai.d.ts
// Copyright (C) 2025 DTP Technologies, LLC
// All Rights Reserved
export interface SpeechCreateParams {
voice: 'robc' | 'acosta';
}
Loading…
Cancel
Save