Switch to unified view

a/sc2src/alsadirect.cpp b/sc2src/alsadirect.cpp
...
...
239
    double sum;
239
    double sum;
240
    int idx;
240
    int idx;
241
};
241
};
242
242
243
// Convert config parameter to libsamplerate converter type
243
// Convert config parameter to libsamplerate converter type
244
// Hopefully this will never include neg values, as we use -1 to mean
245
// "no conversion"
244
static int src_cvt_type(ConfSimple *config)
246
static int src_cvt_type(ConfSimple *config)
245
{
247
{
246
    int tp = SRC_SINC_FASTEST;
248
    int tp = SRC_SINC_FASTEST;
247
    if (!config)
249
    if (!config)
248
        return tp;
250
        return tp;
...
...
258
        tp = SRC_SINC_FASTEST;
260
        tp = SRC_SINC_FASTEST;
259
    } else if (!value.compare("SRC_ZERO_ORDER_HOLD")) {
261
    } else if (!value.compare("SRC_ZERO_ORDER_HOLD")) {
260
        tp = SRC_ZERO_ORDER_HOLD;
262
        tp = SRC_ZERO_ORDER_HOLD;
261
    } else if (!value.compare("SRC_LINEAR")) {
263
    } else if (!value.compare("SRC_LINEAR")) {
262
        tp = SRC_LINEAR;
264
        tp = SRC_LINEAR;
265
    } else if (!value.compare("NONE")) {
266
        tp = -1;
263
    } else {
267
    } else {
264
        // Allow numeric values for transparent expansion to
268
        // Allow numeric values for transparent expansion to
265
        // hypothetic libsamplerate updates (allowing this is explicit
269
        // hypothetic libsamplerate updates (allowing this is explicit
266
        // in the libsamplerate doc).
270
        // in the libsamplerate doc).
267
        long int lval;
271
        long int lval;
...
...
273
            LOGERR("Invalid converter type [" << value << 
277
            LOGERR("Invalid converter type [" << value << 
274
                   "] using SRCC_SINC_FASTEST" << endl);
278
                   "] using SRCC_SINC_FASTEST" << endl);
275
        }
279
        }
276
    }
280
    }
277
    return tp;
281
    return tp;
278
279
}
282
}
283
284
// Computing the samplerate conversion factor. We want to keep
285
// the queue at its target size to control the delay. The
286
// present hack sort of works but could probably benefit from
287
// a more scientific approach
288
static double compute_ratio(double& samplerate_ratio, int bufframes,
289
                            Filter& filter)
290
{
291
    // Integral term. We do not use it at the moment
292
    // double it = 0;
293
294
    double qs = 0.0;
295
    
296
    if (qinit) {
297
        // Qsize in frames. This is the variable to control
298
        qs = alsaqueue.qsize() * bufframes + alsadelay();
299
        // Error term
300
        double qstargframes = qstarg * bufframes;
301
        double et =  ((qstargframes - qs) / qstargframes);
302
303
        // Integral. Not used, made it worse each time I tried.
304
        // This is probably because our command is actually the
305
        // derivative of the error? I should try a derivative term
306
        // instead?
307
        // it += et;
308
309
        // Error correction coef
310
        double ce = 0.1;
311
312
        // Integral coef
313
        //double ci = 0.0001;
314
315
        // Compute command
316
        double adj = ce * et /* + ci * it*/;
317
318
        // Also tried a quadratic correction, worse.
319
        // double adj = et * ((et < 0) ? -et : et);
320
321
        // Computed ratio
322
        samplerate_ratio =  1.0 + adj;
323
324
        // Limit extension
325
        if (samplerate_ratio < 0.9) 
326
            samplerate_ratio = 0.9;
327
        if (samplerate_ratio > 1.1)
328
            samplerate_ratio = 1.1;
329
330
    } else {
331
        // Starting up, wait for more info
332
        qs = alsaqueue.qsize();
333
        samplerate_ratio = 1.0;
334
        // it = 0;
335
    }
336
337
    // Average the rate value to eliminate fast oscillations
338
    return filter(samplerate_ratio);
339
}
340
341
// Convert ints input buffer into floats for libsamplerate processing
342
// Data always comes in host order, because this is what we
343
// request from upstream. 24 and 32 bits are untested.
344
static bool fixToFloats(AudioMessage *tsk, SRC_DATA& src_data,
345
                        size_t tot_samples)
346
{
347
    switch (tsk->m_bits) {
348
    case 16: 
349
    {
350
        const short *sp = (const short *)tsk->m_buf;
351
        for (unsigned int i = 0; i < tot_samples; i++) {
352
            src_data.data_in[i] = *sp++;
353
        }
354
    }
355
    break;
356
    case 24: 
357
    {
358
        const unsigned char *icp = (const unsigned char *)tsk->m_buf;
359
        int o;
360
        unsigned char *ocp = (unsigned char *)&o;
361
        for (unsigned int i = 0; i < tot_samples; i++) {
362
            ocp[0] = *icp++;
363
            ocp[1] = *icp++;
364
            ocp[2] = *icp++;
365
            ocp[3] = (ocp[2] & 0x80) ? 0xff : 0;
366
            src_data.data_in[i] = o;
367
        }
368
    }
369
    break;
370
    case 32: 
371
    {
372
        const int *ip = (const int *)tsk->m_buf;
373
        for (unsigned int i = 0; i < tot_samples; i++) {
374
            src_data.data_in[i] = *ip++;
375
        }
376
    }
377
    break;
378
    default:
379
        LOGERR("audioEater:alsa: bad m_bits: " << tsk->m_bits << endl);
380
        return false;
381
    }
382
    return true;
383
}
384
385
// Convert floats buffer into output which is always 16LE for now. We
386
// should probably dither the lsb ?
387
// The libsamplerate output values can overshoot the input range (see
388
// http://www.mega-nerd.com/SRC/faq.html#Q001), so we take care to
389
// clip the values.
390
bool floatsToFix(AudioMessage *tsk, SRC_DATA& src_data,
391
                size_t tot_samples)
392
{
393
    short *sp = (short *)tsk->m_buf;
394
    switch (tsk->m_bits) {
395
    case 16:
396
        for (unsigned int i = 0; i < tot_samples; i++) {
397
            int v = src_data.data_out[i];
398
            if (v > 32767) {
399
                v = 32767;
400
            } else if (v < -32768) {
401
                v = -32768;
402
            }
403
            *sp++ = BSWAP16(short(v));
404
        }
405
    break;
406
    case 24:
407
        for (unsigned int i = 0; i < tot_samples; i++) {
408
            int v = src_data.data_out[i];
409
            if (v > (1 << 23) - 1) {
410
                v = (1 << 23) - 1;
411
            } else if (v < -(1 << 23)) {
412
                v = -(1 << 23);
413
            }
414
            *sp++ = BSWAP16(short(v >> 8));
415
        }
416
    break;
417
    case 32:
418
        for (unsigned int i = 0; i < tot_samples; i++) {
419
            float& f = src_data.data_out[i];
420
            int v = f;
421
            if (f > 0 && v < 0) {
422
                v = unsigned(1 << 31) - 1;
423
            } else if (f < 0 && v > 0) {
424
                v = -unsigned(1 << 31);
425
            }
426
            *sp++ = BSWAP16(short(v >> 16));
427
        }
428
    break;
429
    default:
430
        LOGERR("audioEater:alsa: bad m_bits: " << tsk->m_bits << endl);
431
        return false;
432
    }
433
    tsk->m_bytes = (char *)sp - tsk->m_buf;
434
    tsk->m_bits = 16;
435
    return true;
436
}
437
438
// Convert input buffer to 16le. Input samples are 16 bits or more, in
439
// host order. Data can only shrink, no allocation needed.
440
bool convert_to16le(AudioMessage *tsk)
441
{
442
    unsigned int tot_samples = tsk->samples();
443
    short *sp = (short *)tsk->m_buf;
444
    switch (tsk->m_bits) {
445
    case 16:
446
        for (unsigned int i = 0; i < tot_samples; i++) {
447
            short v = *sp;
448
            *sp++ = BSWAP16(v);
449
        }
450
    break;
451
    case 24:
452
    {
453
        const unsigned char *icp = (const unsigned char *)tsk->m_buf;
454
        for (unsigned int i = 0; i < tot_samples; i++) {
455
            int o;
456
            unsigned char *ocp = (unsigned char *)&o;
457
            ocp[0] = *icp++;
458
            ocp[1] = *icp++;
459
            ocp[2] = *icp++;
460
            ocp[3] = (ocp[2] & 0x80) ? 0xff : 0;
461
            *sp++ = BSWAP16(short(o >> 8));
462
        }
463
    }
464
    break;
465
    case 32:
466
    {
467
        const int *ip = (const int *)tsk->m_buf;
468
        for (unsigned int i = 0; i < tot_samples; i++) {
469
            *sp++ = BSWAP16(short((*ip++) >> 16));
470
        }
471
    }
472
    break;
473
    default:
474
        LOGERR("audioEater:alsa: bad m_bits: " << tsk->m_bits << endl);
475
        return false;
476
    }
477
    tsk->m_bytes = (char *)sp - tsk->m_buf;
478
    tsk->m_bits = 16;
479
    return true;
480
}
481
482
// Complete input processing:
483
// - compute samplerate conversion factor,
484
// - convert input to float
485
// - apply conversion
486
// - Convert back to int16le
487
bool stretch_buffer(AudioMessage *tsk,
488
                    SRC_STATE * src_state, SRC_DATA& src_data,
489
                    size_t& src_input_bytes, Filter& filter)
490
{
491
    // Number of frames per buffer. This is mostly constant for a
492
    // given stream (depends on fe and buffer time, Windows Songcast
493
    // buffers are 10mS, so 441 frames at cd q). Recomputed on first
494
    // buf, the init is to avoid warnings
495
    int bufframes = tsk->frames();
496
497
    double samplerate_ratio = 1.0;
498
    unsigned int tot_samples = tsk->samples();
499
500
    // Compute sample rate ratio, and return current qsize in
501
    // frames. This is the variable which we control.
502
    double qs = compute_ratio(samplerate_ratio, bufframes, filter);
503
504
    src_data.input_frames = tsk->frames();
505
506
    // Possibly reallocate buffer
507
    size_t needed_bytes = tot_samples * sizeof(float);
508
    if (src_input_bytes < needed_bytes) {
509
        src_data.data_in =
510
            (float *)realloc(src_data.data_in, needed_bytes);
511
        src_data.data_out = (float *)realloc(src_data.data_out,
512
                                             2 * needed_bytes);
513
        src_data.output_frames = 2 * tot_samples / tsk->m_chans;
514
        src_input_bytes = needed_bytes;
515
    }
516
517
    src_data.src_ratio = samplerate_ratio;
518
    src_data.end_of_input = 0;
519
520
    // Convert to floats
521
    if (!fixToFloats(tsk, src_data, tot_samples)) {
522
        return false;
523
    }
524
525
    // Call samplerate converter
526
    int ret = src_process(src_state, &src_data);
527
    if (ret) {
528
        LOGERR("src_process: " << src_strerror(ret) << endl);
529
        return false;
530
    }
531
532
    { // Tell the world
533
        static int cnt;
534
        if (cnt++ == 103) {
535
            LOGDEB("audioEater:alsa: " 
536
                   " qstarg " << qstarg <<
537
                   " iqsz " << alsaqueue.qsize() <<
538
                   " qsize " << int(qs/bufframes) << 
539
                   " ratio " << samplerate_ratio <<
540
                   " in " << src_data.input_frames << 
541
                   " consumed " << src_data.input_frames_used << 
542
                   " out " << src_data.output_frames_gen << endl);
543
            cnt = 0;
544
        }
545
    }
546
547
    // New number of samples after conversion. We are going to
548
    // copy them back to the audio buffer, and may need to
549
    // reallocate it.
550
    tot_samples =  src_data.output_frames_gen * tsk->m_chans;
551
    needed_bytes = tot_samples * (tsk->m_bits / 8);
552
    if (tsk->m_allocbytes < needed_bytes) {
553
        tsk->m_allocbytes = needed_bytes;
554
        tsk->m_buf = (char *)realloc(tsk->m_buf, tsk->m_allocbytes);
555
        if (!tsk->m_buf) {
556
            LOGERR("audioEater:alsa: out of memory\n");
557
            return false;
558
        }
559
    }
560
561
    if (!floatsToFix(tsk, src_data, tot_samples)) {
562
        return false;
563
    }
564
    return true;
565
}
566
280
static void *audioEater(void *cls)
567
static void *audioEater(void *cls)
281
{
568
{
282
    AudioEater::Context *ctxt = (AudioEater::Context*)cls;
569
    AudioEater::Context *ctxt = (AudioEater::Context*)cls;
283
570
284
    int cvt_type = src_cvt_type(ctxt->config);
571
    int cvt_type = src_cvt_type(ctxt->config);
...
...
293
    delete ctxt;
580
    delete ctxt;
294
    ctxt = 0;
581
    ctxt = 0;
295
582
296
    qinit = false;
583
    qinit = false;
297
584
298
    double samplerate_ratio = 1.0;
299
    Filter filter;
585
    Filter filter;
300
586
301
    int src_error = 0;
587
    int src_error = 0;
302
    SRC_STATE *src_state = 0;
588
    SRC_STATE *src_state = 0;
303
    SRC_DATA src_data;
589
    SRC_DATA src_data;
...
...
306
    // twice the size for output (allocated on first use).
592
    // twice the size for output (allocated on first use).
307
    size_t src_input_bytes = 0;
593
    size_t src_input_bytes = 0;
308
    
594
    
309
    alsaqueue.start(1, alsawriter, 0);
595
    alsaqueue.start(1, alsawriter, 0);
310
596
311
    // Integral term. We do not use it at the moment
312
    // double it = 0;
313
314
    // Number of frames per buffer. This is mostly constant for a
315
    // given stream (depends on fe and buffer time, Windows Songcast
316
    // buffers are 10mS, so 441 frames at cd q). Recomputed on first
317
    // buf, the init is to avoid warnings
318
    int bufframes = 441;
319
320
    while (true) {
597
    while (true) {
321
        AudioMessage *tsk = 0;
598
        AudioMessage *tsk = 0;
322
        size_t qsz;
599
        size_t qsz;
323
        if (!queue->take(&tsk, &qsz)) {
600
        if (!queue->take(&tsk, &qsz)) {
324
            LOGDEB("audioEater: alsadirect: queue take failed\n");
601
            LOGDEB("audioEater: alsadirect: queue take failed\n");
...
...
330
        if (tsk->m_bytes == 0 || tsk->m_chans == 0 || tsk->m_bits == 0) {
607
        if (tsk->m_bytes == 0 || tsk->m_chans == 0 || tsk->m_bits == 0) {
331
            LOGDEB("Zero buf\n");
608
            LOGDEB("Zero buf\n");
332
            continue;
609
            continue;
333
        }
610
        }
334
611
612
        // 1st time: init
335
        if (src_state == 0) {
613
        if (src_state == 0) {
336
            if (!alsa_init(alsadevice, tsk)) {
614
            if (!alsa_init(alsadevice, tsk)) {
337
                alsaqueue.setTerminateAndWait();
615
                alsaqueue.setTerminateAndWait();
338
                queue->workerExit();
616
                queue->workerExit();
339
                return (void *)1;
617
                return (void *)1;
340
            }
618
            }
341
            // BEST_QUALITY yields approx 25% cpu on a core i7
619
            if (cvt_type != -1) {
342
            // 4770T. Obviously too much, actually might not be
343
            // sustainable (it's almost 100% of 1 cpu)
344
            // MEDIUM_QUALITY is around 10%
345
            // FASTEST is 4-5%. Given that this measured for the full
346
            // process, probably a couple % for the conversion in fact.
347
            // Rpi: FASTEST is 30% CPU on a Pi2 with USB
348
            // audio. Curiously it's 25-30% on a Pi1 with i2s audio.
349
            src_state = src_new(cvt_type, tsk->m_chans, &src_error);
620
                src_state = src_new(cvt_type, tsk->m_chans, &src_error);
350
621
            } else {
351
            bufframes = tsk->frames();
622
                src_state = (SRC_STATE *)malloc(1);
623
            }
352
        }
624
        }
353
        
625
        
354
        // Computing the samplerate conversion factor. We want to keep
626
        // Process input buffer
355
        // the queue at its target size to control the delay. The
627
        if (cvt_type != -1) {
356
        // present hack sort of works but could probably benefit from
628
            if (!stretch_buffer(tsk, src_state, src_data, src_input_bytes,
357
        // a more scientific approach
629
                               filter)) {
358
359
        // Qsize in frames. This is the variable to control
360
        double qs;
361
362
        if (qinit) {
363
            qs = alsaqueue.qsize() * bufframes + alsadelay();
364
            // Error term
365
            double qstargframes = qstarg * bufframes;
366
            double et =  ((qstargframes - qs) / qstargframes);
367
368
            // Integral. Not used, made it worse each time I tried.
369
            // This is probably because our command is actually the
370
            // derivative of the error? I should try a derivative term
371
            // instead?
372
            // it += et;
373
374
            // Error correction coef
375
            double ce = 0.1;
376
377
            // Integral coef
378
            //double ci = 0.0001;
379
380
            // Compute command
381
            double adj = ce * et /* + ci * it*/;
382
383
            // Also tried a quadratic correction, worse.
384
            // double adj = et * ((et < 0) ? -et : et);
385
386
            // Computed ratio
387
            samplerate_ratio =  1.0 + adj;
388
389
            // Limit extension
390
            if (samplerate_ratio < 0.9) 
391
                samplerate_ratio = 0.9;
392
            if (samplerate_ratio > 1.1)
393
                samplerate_ratio = 1.1;
394
395
        } else {
396
            // Starting up, wait for more info
397
            qs = alsaqueue.qsize();
398
            samplerate_ratio = 1.0;
399
            // it = 0;
400
        }
401
402
        // Average the rate value to eliminate fast oscillations
403
        samplerate_ratio = filter(samplerate_ratio);
404
405
        unsigned int tot_samples = tsk->samples();
406
        src_data.input_frames = tsk->frames();
407
        size_t needed_bytes = tot_samples * sizeof(float);
408
        if (src_input_bytes < needed_bytes) {
409
            src_data.data_in = (float *)realloc(src_data.data_in, needed_bytes);
410
            src_data.data_out = (float *)realloc(src_data.data_out,
411
                                                 2 * needed_bytes);
412
            src_data.output_frames = 2 * tot_samples / tsk->m_chans;
413
            src_input_bytes = needed_bytes;
414
        }
415
416
        src_data.src_ratio = samplerate_ratio;
417
        src_data.end_of_input = 0;
418
        
419
        // Data always comes in host order, because this is what we
420
        // request from upstream. 24 and 32 bits are untested.
421
        switch (tsk->m_bits) {
422
        case 16: 
423
        {
424
            const short *sp = (const short *)tsk->m_buf;
425
            for (unsigned int i = 0; i < tot_samples; i++) {
426
                src_data.data_in[i] = *sp++;
427
            }
428
        }
429
        break;
430
        case 24: 
431
        {
432
            const unsigned char *icp = (const unsigned char *)tsk->m_buf;
433
            int o;
434
            unsigned char *ocp = (unsigned char *)&o;
435
            for (unsigned int i = 0; i < tot_samples; i++) {
436
                ocp[0] = *icp++;
437
                ocp[1] = *icp++;
438
                ocp[2] = *icp++;
439
                ocp[3] = (ocp[2] & 0x80) ? 0xff : 0;
440
                src_data.data_in[i] = o;
441
            }
442
        }
443
        break;
444
        case 32: 
445
        {
446
            const int *ip = (const int *)tsk->m_buf;
447
            for (unsigned int i = 0; i < tot_samples; i++) {
448
                src_data.data_in[i] = *ip++;
449
            }
450
        }
451
        break;
452
        default:
453
            LOGERR("audioEater:alsa: bad m_bits: " << tsk->m_bits << endl);
454
            alsaqueue.setTerminateAndWait();
455
            queue->workerExit();
456
            return (void *)1;
457
        }
458
459
        int ret = src_process(src_state, &src_data);
460
        if (ret) {
461
            LOGERR("src_process: " << src_strerror(ret) << endl);
462
            continue;
463
        }
464
465
        {
466
            static int cnt;
467
            if (cnt++ == 103) {
468
                LOGDEB("audioEater:alsa: " 
469
                       " qstarg " << qstarg <<
470
                       " iqsz " << alsaqueue.qsize() <<
471
                       " qsize " << int(qs/bufframes) << 
472
                       " ratio " << samplerate_ratio <<
473
                       " in " << src_data.input_frames << 
474
                       " consumed " << src_data.input_frames_used << 
475
                       " out " << src_data.output_frames_gen << endl);
476
                cnt = 0;
477
            }
478
        }
479
480
        // New number of samples after conversion. We are going to
481
        // copy them back to the audio buffer, and may need to
482
        // reallocate it.
483
        tot_samples =  src_data.output_frames_gen * tsk->m_chans;
484
        needed_bytes = tot_samples * (tsk->m_bits / 8);
485
        if (tsk->m_allocbytes < needed_bytes) {
486
            tsk->m_allocbytes = needed_bytes;
487
            tsk->m_buf = (char *)realloc(tsk->m_buf, tsk->m_allocbytes);
488
            if (!tsk->m_buf) {
489
                LOGERR("audioEater:alsa: out of memory\n");
490
                alsaqueue.setTerminateAndWait();
630
                alsaqueue.setTerminateAndWait();
491
                queue->workerExit();
631
                queue->workerExit();
492
                return (void *)1;
632
                return (void *)1;
493
            }
633
            }
494
        }
634
        } else {
495
635
            convert_to16le(tsk);
496
        // Convert floats buffer into output which is always 16LE for
497
        // now. We should probably dither the lsb ?
498
  // The libsamplerate output values can overshoot the input range 
499
  // (see http://www.mega-nerd.com/SRC/faq.html#Q001), so we take care 
500
  // to clip the values.
501
        {
636
        }
502
            short *sp = (short *)tsk->m_buf;
503
            switch (tsk->m_bits) {
504
            case 16:
505
            {
506
                for (unsigned int i = 0; i < tot_samples; i++) {
507
                    int v = src_data.data_out[i];
508
                    if (v > 32767) {
509
                        v = 32767;
510
                    } else if (v < -32768) {
511
                        v = -32768;
512
                    }
513
                    *sp++ = BSWAP16(short(v));
514
                }
515
            }
516
            break;
517
            case 24:
518
            {
519
                for (unsigned int i = 0; i < tot_samples; i++) {
520
                    int v = src_data.data_out[i];
521
                    if (v > (1 << 23) - 1) {
522
                        v = (1 << 23) - 1;
523
                    } else if (v < -(1 << 23)) {
524
                        v = -(1 << 23);
525
                    }
526
                    *sp++ = BSWAP16(short(v >> 8));
527
                }
528
            }
529
            break;
530
            case 32:
531
            {
532
                for (unsigned int i = 0; i < tot_samples; i++) {
533
                    float& f = src_data.data_out[i];
534
                    int v = f;
535
                    if (f > 0 && v < 0) {
536
                        v = unsigned(1 << 31) - 1;
537
                    } else if (f < 0 && v > 0) {
538
                        v = -unsigned(1 << 31);
539
                    }
540
                    *sp++ = BSWAP16(short(v >> 16));
541
                }
542
            }
543
            break;
544
            default:
545
                LOGERR("audioEater:alsa: bad m_bits: " << tsk->m_bits << endl);
546
                alsaqueue.setTerminateAndWait();
547
                queue->workerExit();
548
                return (void *)1;
549
            }
550
            tsk->m_bytes = (char *)sp - tsk->m_buf;
551
        }
552
        tsk->m_bits = 16;
553
637
638
        // Send data on its way
554
        if (!alsaqueue.put(tsk)) {
639
        if (!alsaqueue.put(tsk)) {
555
            LOGERR("alsaEater: queue put failed\n");
640
            LOGERR("alsaEater: queue put failed\n");
556
            queue->workerExit();
641
            queue->workerExit();
557
            return (void *)1;
642
            return (void *)1;
558
        }
643
        }