import { z } from 'zod';

export const generateEmbeddingsSchema = z.object({
  chatId: z.string().uuid(),
  version: z.number().nonnegative(),
  batchSize: z.number().min(1).optional(),
  chunkSize: z.number().min(100).optional(),
  chunkOverlap: z.number().min(10).optional(),
  transformer: z.enum(['jamu']).optional(),
  chunkTransformer: z.enum(['contextualized']).optional(),
  chunkTransformerOptions: z.object({
    language: z.string().optional(),
  }),
});

export type GenerateEmbeddings = z.infer<typeof generateEmbeddingsSchema>;

const storageSourceDataSchema = z.object({
  bucketName: z.string(),
  prefix: z.string(),
});

const scrapeSourceDataSchema = z.object({
  urls: z.array(z.string()),
});

const crawlSourceDataSchema = z.object({
  url: z.string(),
  limit: z.number().positive().default(10),
});

export type StorageSourceData = z.infer<typeof storageSourceDataSchema>;
export type ScrapeSourceData = z.infer<typeof scrapeSourceDataSchema>;
export type CrawlSourceData = z.infer<typeof crawlSourceDataSchema>;

export const createEmbeddingsSchema = z
  .object({
    mode: z.enum(['storage', 'scrape', 'crawl']),
    chatId: z.string().uuid(),
    batchSize: z.number().optional(),
  })
  .and(
    z.union([
      storageSourceDataSchema,
      scrapeSourceDataSchema,
      crawlSourceDataSchema,
    ]),
  );

export type CreateEmbeddings = z.infer<typeof createEmbeddingsSchema>;
