import FieldModel from '@/models/FieldModel';

export const sparkConfigList: FieldModel[] = [
    {
        fieldName: "defaultParallelism",
        label: "spark.default.parallelism",
        type: "plain-text",
        tooltipText:
            "Default number of partitions in RDDs. We recommend that you estimate the size of each partition and adjust this number accordingly using coalesce or repartition.",
        valueTooltipText:
            "Total number of cores on all executor nodes times parallelism per core or 2, whichever is larger",
    },
    {
        fieldName: "executorMemory",
        label: "spark.executor.memory (GB)",
        type: "plain-text",
        tooltipText: "Amount of memory to use per executor process.",
        valueTooltipText: "Node memory without the overhead memory.",
    },
    {
        fieldName: "executorInstances",
        label: "spark.executor.instances",
        type: "plain-text",
        tooltipText: "Final number of executor instances.",
        valueTooltipText: "Leaving 1 executor for ApplicationManager.",
    },
    {
        fieldName: "driverCores",
        label: "spark.driver.cores",
        type: "plain-text",
        tooltipText: "Amount of memory to use for the driver process.",
        valueTooltipText:
            "We recommend setting this to spark.executors.cores.",
    },
    {
        fieldName: "executorCores",
        label: "spark.executor.cores",
        type: "number-input",
        tooltipText: "The number of cores to use on each executor.",
        valueTooltipText:
            "Assigning executors with a large number of virtual cores leads to a low number of executors and reduced parallelism. Assigning a low number of virtual cores leads to a high number of executors, causing a larger amount of I/O operations. We suggest that you have 5 cores for each executor to achieve optimal results in any sized cluster.",
    },
    {
        fieldName: "driverMemory",
        label: "spark.driver.memory (GB)",
        type: "plain-text",
        tooltipText: "Amount of memory to use for the driver process.",
        valueTooltipText:
            "We recommend setting this to spark.executors.memory.",
    },
    {
        fieldName: "driverMaxResultSize",
        label: "spark.driver.maxResultSize (GB)",
        type: "plain-text",
        tooltipText:
            "Limit of total size of serialized results of all partitions for each Spark action (e.g. collect).",
        valueTooltipText:
            "Should be at least 1M, or 0 for unlimited. Jobs will be aborted if the total size is above this limit. Having a high limit may cause out-of-memory errors in driver (depends on spark.driver.memory and memory overhead of objects in JVM). Setting a proper limit can protect the driver from out-of-memory errors.",
    },
    {
        fieldName: "driverMemoryOverhead",
        label: "spark.driver.memoryOverhead (MB)",
        type: "plain-text",
        tooltipText:
            "Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size (typically 6-10%). This option is currently supported on YARN, Mesos and Kubernetes.",
        valueTooltipText: "spark.driver.memory * 0.10, with minimum of 384",
    },
    {
        fieldName: "executorMemoryOverhead",
        label: "spark.executor.memoryOverhead (MB)",
        type: "plain-text",
        tooltipText:
            "Amount of additional memory to be allocated per executor process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%). This option is currently supported on YARN and Kubernetes.",
        valueTooltipText:
            "Amount of additional memory to be allocated per executor process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size (typically 6-10%). This option is currently supported on YARN and Kubernetes.",
    },
    {
        fieldName: "dynamicAllocation",
        label: "spark.dynamicAllocation.enabled",
        type: "plain-text",
        tooltipText:
            "Spark on YARN can dynamically scale the number of executors used for a Spark application based on the workloads. This is the configuration responsible for it.",
        valueTooltipText:
            "Set spark.dynamicAllocation.enabled to true only if the numbers are properly determined for spark.dynamicAllocation.initialExecutors, minExecutors, maxExecutors. Otherwise we recommend to manually calculate the resources for the important jobs.",
    },
    {
        fieldName: "adaptiveQuery",
        label: "spark.sql.adaptive.enabled",
        type: "plain-text",
        tooltipText:
            "Adaptive Query Execution is an optimization technique in Spark SQL that makes use of the runtime statistics to choose the most efficient query execution plan.",
        valueTooltipText:
            "Adaptive Query Execution set to false by default in Spark 3.0. It applies if the query is not a streaming query and contains at least one exchange (usually when there’s a join, aggregate or window operator) or one subquery. We recommend set it to true.",
    },
];

export const unusedResourcesList: FieldModel[] = [
    {
        fieldName: "unusedMemoryPerNode",
        label: "Unused memory per node",
        type: "plain-text",
    },
    {
        fieldName: "unusedCoresPerNode",
        label: "Unused cores per node",
        type: "plain-text",
    },
];

export const recommendedConfiguration: FieldModel[] = [
    {
        fieldName: "spark.memory.fraction",
        label: "spark.memory.fraction",
        value: "0.8",
        type: "plain-text",
        tooltipText: "Fraction of JVM heap space used for Spark execution and storage.",
        valueTooltipText: "The lower this is, the more frequently spills and cached data eviction occur."
    },
    {
        fieldName: "spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures",
        label: "spark.scheduler.barrier.maxConcurrentTasksCheck.maxFailures",
        value: "5",
        type: "plain-text",
        tooltipText: "Maximum number executor failures allowed before YARN can fail the application."
    },
    {
        fieldName: "spark.rdd.compress",
        label: "spark.rdd.compress",
        value: "true",
        type: "plain-text",
        tooltipText: "Whether to compress serialized RDD partitions",
        valueTooltipText: "When set to true, this property can save substantial space at the cost of some extra CPU time by compressing the RDDs."
    },
    {
        fieldName: "spark.shuffle.compress",
        label: "spark.shuffle.compress",
        value: "true",
        type: "plain-text",
        tooltipText: "Whether to compress the map output.",
        valueTooltipText: "When set to true, this property compresses the map output to save space."
    },
    {
        fieldName: "spark.shuffle.spill.compress",
        label: "spark.shuffle.spill.compress",
        value: "true",
        type: "plain-text",
        tooltipText: "Whether to compress the data spilled during shuffles.",
        valueTooltipText: "When set to true, this property compresses the data spilled during shuffles."
    },
    {
        fieldName: "spark.serializer",
        label: "spark.serializer",
        value: "org.apache.spark.serializer.KryoSerializer",
        type: "plain-text",
        tooltipText: "Class to use for serializing objects that will be sent over the network or need to be cached in serialized form.",
        valueTooltipText: "The default of Java serialization works with any Serializable Java object but is quite slow, so we recommend using org.apache.spark.serializer.KryoSerializer and configuring Kryo serialization when speed is necessary."
    },
    {
        fieldName: "spark.executor.extraJavaOptions",
        label: "spark.executor.extraJavaOptions",
        value: "-XX:+UseG1GC -XX:+G1SummarizeConcMark",
        type: "plain-text",
        tooltipText: "A string of extra JVM options to pass to executors.",
        valueTooltipText: "You can use multiple garbage collectors to evict the old objects and place the new ones into the memory. However, the latest Garbage First Garbage Collector (G1GC) overcomes the latency and throughput limitations with the old garbage collectors."
    },
    {
        fieldName: "spark.driver.extraJavaOptions",
        label: "spark.driver.extraJavaOptions",
        value: "-XX:+UseG1GC -XX:+G1SummarizeConcMark",
        type: "plain-text",
        tooltipText: "A string of extra JVM options to pass to the driver.",
        valueTooltipText: "You can use multiple garbage collectors to evict the old objects and place the new ones into the memory. However, the latest Garbage First Garbage Collector (G1GC) overcomes the latency and throughput limitations with the old garbage collectors."
    },
];

export const clusterResourcesList: FieldModel[] = [
    {
        fieldName: "totalMem",
        label: "Total memory (GB)",
        type: "plain-text",
        tooltipText: 'Total cluster memory.'
    },
    {
        fieldName: "overheadMemory",
        label: "Total overhead memory (GB)",
        type: "plain-text",
        tooltipText: 'Total overhead memory in the cluster.'
    },
    {
        fieldName: "availableCores",
        label: "Total available cores",
        type: "plain-text",
        tooltipText: 'Total cluster cores availible for node containers.',
        valueTooltipText: 'Leave 1 core per node for Hadoop/Yarn daemons.'
    },
    {
        fieldName: "availableMemory",
        label: "Total available memory (GB)",
        type: "plain-text",
        tooltipText: 'Total cluster memory availible for node containers.'
    },
    {
        fieldName: "numberOfExecutorsPerNode",
        label: "Number of executors per node",
        type: "plain-text",
        valueTooltipText: 'Number of executors per node = (total number of cores per node - 1) / spark.executors.cores'
    },
    {
        fieldName: "totalMemPerExecutor",
        label: "Memory per executor (GB)",
        type: "plain-text",
        tooltipText: 'This total memory per executor includes the executor memory and overhead (spark.executor.memoryOverhead).',
        valueTooltipText: 'Leave 1 GB for the Hadoop daemons.'
    },
];

export const clusterSizeParamsList: Array<FieldModel> = [
    {
        fieldName: "numberOfNodes",
        label: "Number of nodes *",
        type: "number-input",
    },
    {
        fieldName: "numberOfCores",
        label: "Number of cores per node *",
        type: "number-input",
    },
    {
        fieldName: "memoryPerNode",
        label: "Memory (RAM) per node (GB) *",
        type: "number-input",
    },
];
export const additionalParamsList: Array<FieldModel> = [
    {
        fieldName: "parallelismPerCore",
        label: "Parallelism Per Core",
        type: "number-input",
        tooltipText: "The level of parallelism per allocated core. This field is used to determine the spark.default.parallelism configuration.",
        valueTooltipText: "We recommend setting this value to 2. It can be higher for a large cluster."
    },
    {
        fieldName: "memoryOverhead",
        label: "Memory overhead (%)",
        type: "number-input",
        tooltipText: "The percentage of memory in each executor that will be reserved for spark.executor.memoryOverhead.",
    },
];

export const additionalParamsTooltip = "The following parameters help to fine tune the overall optimized configuration. We recommend to leave them as defaults.";
export const recommendationHeadingTooltip = "Though the following parameters are not required but they can help in running the applications smoothly to avoid timeout and memory-related errors. We advise that you set these in the spark-defaults configuration file.";
